teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +315 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +95 -8
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +5 -1
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +51 -6
- teradataml/automl/data_preparation.py +59 -35
- teradataml/automl/data_transformation.py +58 -33
- teradataml/automl/feature_engineering.py +27 -12
- teradataml/automl/model_training.py +73 -46
- teradataml/common/constants.py +88 -29
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +19 -3
- teradataml/common/messages.py +6 -1
- teradataml/common/sqlbundle.py +64 -12
- teradataml/common/utils.py +246 -47
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +161 -27
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/teradataml_example.json +8 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +11 -1
- teradataml/dataframe/dataframe.py +1049 -285
- teradataml/dataframe/dataframe_utils.py +152 -20
- teradataml/dataframe/functions.py +578 -35
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +185 -16
- teradataml/dbutils/dbutils.py +1049 -115
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/_base.py +1466 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
- teradataml/options/__init__.py +54 -38
- teradataml/options/configure.py +131 -27
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +5 -5
- teradataml/scriptmgmt/lls_utils.py +130 -40
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2318 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/table_operators/Apply.py +32 -18
- teradataml/table_operators/Script.py +3 -1
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +51 -2
- teradataml/utils/internal_buffer.py +18 -0
- teradataml/utils/validators.py +99 -8
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_class.py +0 -255
- teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,2318 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright (c) 2024 by Teradata Corporation. All rights reserved.
|
|
3
|
+
TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
|
|
4
|
+
|
|
5
|
+
Primary Owner: pradeep.garre@teradata.com
|
|
6
|
+
Secondary Owner: adithya.avvaru@teradata.com
|
|
7
|
+
|
|
8
|
+
This file implements the core framework that allows user to use Teradata Enterprise Feature Store.
|
|
9
|
+
"""
|
|
10
|
+
|
|
11
|
+
from sqlalchemy import literal_column
|
|
12
|
+
from teradataml.context.context import get_connection
|
|
13
|
+
from teradataml.common.constants import SQLConstants
|
|
14
|
+
from teradataml.common.exceptions import TeradataMlException
|
|
15
|
+
from teradataml.common.messages import Messages
|
|
16
|
+
from teradataml.common.messagecodes import MessageCodes
|
|
17
|
+
from teradataml.dataframe.sql import _SQLColumnExpression as Col
|
|
18
|
+
from teradataml.dbutils.dbutils import _create_database, _create_table, db_drop_table, execute_sql, Grant, Revoke, _update_data, _delete_data, db_transaction
|
|
19
|
+
from teradataml.store.feature_store.constants import *
|
|
20
|
+
from teradataml.store.feature_store.models import *
|
|
21
|
+
from teradataml.common.sqlbundle import SQLBundle
|
|
22
|
+
from teradataml.utils.validators import _Validators
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
class FeatureStore:
|
|
26
|
+
"""Class for FeatureStore."""
|
|
27
|
+
|
|
28
|
+
def __init__(self, repo):
|
|
29
|
+
"""
|
|
30
|
+
DESCRIPTION:
|
|
31
|
+
Method to create FeatureStore in teradataml.
|
|
32
|
+
|
|
33
|
+
PARAMETERS:
|
|
34
|
+
repo:
|
|
35
|
+
Required Argument.
|
|
36
|
+
Specifies the repository name.
|
|
37
|
+
Types: str.
|
|
38
|
+
|
|
39
|
+
RETURNS:
|
|
40
|
+
Object of FeatureStore.
|
|
41
|
+
|
|
42
|
+
RAISES:
|
|
43
|
+
None
|
|
44
|
+
|
|
45
|
+
EXAMPLES:
|
|
46
|
+
>>> # Create FeatureStore for repository 'vfs_v1'.
|
|
47
|
+
>>> from teradataml import FeatureStore
|
|
48
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
49
|
+
>>> fs
|
|
50
|
+
FeatureStore(vfs_v1)-v1.0
|
|
51
|
+
>>>
|
|
52
|
+
"""
|
|
53
|
+
argument_validation_params = []
|
|
54
|
+
argument_validation_params.append(["repo", repo, False, (str), True])
|
|
55
|
+
|
|
56
|
+
# Validate argument types
|
|
57
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
58
|
+
# Do not validate the existance of repo as it consumes a network call.
|
|
59
|
+
self.__repo = repo
|
|
60
|
+
self.__version = ""
|
|
61
|
+
|
|
62
|
+
# Declare SQLBundle to use it further.
|
|
63
|
+
self.__sql_bundle = SQLBundle()
|
|
64
|
+
|
|
65
|
+
# Store all the DF's here so no need to create these every time.
|
|
66
|
+
self.__df_container = {}
|
|
67
|
+
|
|
68
|
+
# Store the table names here. Then use this where ever required.
|
|
69
|
+
self.__table_names = EFS_TABLES
|
|
70
|
+
|
|
71
|
+
# Declare getter's for getting the corresponding DataFrame's.
|
|
72
|
+
self.__get_features_df = lambda : self.__get_obj_df("feature")
|
|
73
|
+
self.__get_archived_features_df = lambda : self.__get_obj_df("feature_staging")
|
|
74
|
+
self.__get_group_features_df = lambda : self.__get_obj_df("group_features")
|
|
75
|
+
self.__get_archived_group_features_df = lambda : self.__get_obj_df("group_features_staging")
|
|
76
|
+
self.__get_feature_group_df = lambda : self.__get_obj_df("feature_group")
|
|
77
|
+
self.__get_archived_feature_group_df = lambda : self.__get_obj_df("feature_group_staging")
|
|
78
|
+
self.__get_entity_df = lambda : self.__get_obj_df("entity")
|
|
79
|
+
self.__get_archived_entity_df = lambda : self.__get_obj_df("entity_staging")
|
|
80
|
+
self.__get_data_source_df = lambda : self.__get_obj_df("data_source")
|
|
81
|
+
self.__get_archived_data_source_df = lambda : self.__get_obj_df("data_source_staging")
|
|
82
|
+
|
|
83
|
+
self.__good_status = "Good"
|
|
84
|
+
self.__bad_status = "Bad"
|
|
85
|
+
self.__repaired_status = "Repaired"
|
|
86
|
+
|
|
87
|
+
@property
|
|
88
|
+
def repo(self):
|
|
89
|
+
"""
|
|
90
|
+
DESCRIPTION:
|
|
91
|
+
Get the repository.
|
|
92
|
+
|
|
93
|
+
PARAMETERS:
|
|
94
|
+
None
|
|
95
|
+
|
|
96
|
+
RETURNS:
|
|
97
|
+
str
|
|
98
|
+
|
|
99
|
+
RAISES:
|
|
100
|
+
None
|
|
101
|
+
|
|
102
|
+
EXAMPLES:
|
|
103
|
+
>>> from teradataml import FeatureStore
|
|
104
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
105
|
+
>>> fs.repo
|
|
106
|
+
vfs_v1
|
|
107
|
+
>>>
|
|
108
|
+
"""
|
|
109
|
+
return self.__repo
|
|
110
|
+
|
|
111
|
+
@repo.setter
|
|
112
|
+
def repo(self, value):
|
|
113
|
+
"""
|
|
114
|
+
DESCRIPTION:
|
|
115
|
+
Set the repository.
|
|
116
|
+
|
|
117
|
+
PARAMETERS:
|
|
118
|
+
value:
|
|
119
|
+
Required Argument.
|
|
120
|
+
Specifies the repository name.
|
|
121
|
+
Types: str.
|
|
122
|
+
|
|
123
|
+
RETURNS:
|
|
124
|
+
None.
|
|
125
|
+
|
|
126
|
+
RAISES:
|
|
127
|
+
None
|
|
128
|
+
|
|
129
|
+
EXAMPLES:
|
|
130
|
+
# Example 1: Create a FeatureStore for repository 'abc' and
|
|
131
|
+
# then change the repository to 'xyz'.
|
|
132
|
+
>>> from teradataml import FeatureStore
|
|
133
|
+
>>> fs = FeatureStore('abc')
|
|
134
|
+
>>> fs.repo = 'xyz'
|
|
135
|
+
>>>
|
|
136
|
+
"""
|
|
137
|
+
argument_validation_params = []
|
|
138
|
+
argument_validation_params.append(["value", value, False, (str), True])
|
|
139
|
+
|
|
140
|
+
# Validate argument types
|
|
141
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
142
|
+
# remove all entries from container so they will be automatically
|
|
143
|
+
# point to new repo for subsequent API's.
|
|
144
|
+
self.__df_container.clear()
|
|
145
|
+
self.__version = None
|
|
146
|
+
|
|
147
|
+
# Set the repo value.
|
|
148
|
+
self.__repo = value
|
|
149
|
+
|
|
150
|
+
def __repr__(self):
|
|
151
|
+
"""
|
|
152
|
+
DESCRIPTION:
|
|
153
|
+
String representation for FeatureStore object.
|
|
154
|
+
|
|
155
|
+
PARAMETERS:
|
|
156
|
+
None
|
|
157
|
+
|
|
158
|
+
RETURNS:
|
|
159
|
+
str
|
|
160
|
+
|
|
161
|
+
RAISES:
|
|
162
|
+
None
|
|
163
|
+
"""
|
|
164
|
+
s = "VantageFeatureStore({})".format(self.__repo)
|
|
165
|
+
try:
|
|
166
|
+
version = "-v{}".format(self.__get_version())
|
|
167
|
+
except Exception as e:
|
|
168
|
+
version = ""
|
|
169
|
+
return "{}{}".format(s, version)
|
|
170
|
+
|
|
171
|
+
def __get_version(self):
|
|
172
|
+
"""
|
|
173
|
+
DESCRIPTION:
|
|
174
|
+
Internal method to get the FeatureStore version.
|
|
175
|
+
|
|
176
|
+
PARAMETERS:
|
|
177
|
+
None
|
|
178
|
+
|
|
179
|
+
RETURNS:
|
|
180
|
+
str
|
|
181
|
+
|
|
182
|
+
RAISES:
|
|
183
|
+
None
|
|
184
|
+
"""
|
|
185
|
+
if not self.__version:
|
|
186
|
+
sql = "SELECT version FROM {}.{}".format(self.__repo, EFS_VERSION_SPEC["table_name"])
|
|
187
|
+
self.__version = next(execute_sql(sql))[0]
|
|
188
|
+
return self.__version
|
|
189
|
+
|
|
190
|
+
@staticmethod
|
|
191
|
+
def list_repos() -> DataFrame:
|
|
192
|
+
"""
|
|
193
|
+
DESCRIPTION:
|
|
194
|
+
Function to list down the repositories.
|
|
195
|
+
|
|
196
|
+
PARAMETERS:
|
|
197
|
+
None
|
|
198
|
+
|
|
199
|
+
RETURNS:
|
|
200
|
+
teradataml DataFrame
|
|
201
|
+
|
|
202
|
+
RAISES:
|
|
203
|
+
None
|
|
204
|
+
|
|
205
|
+
EXAMPLES:
|
|
206
|
+
# List down all the FeatureStore repositories.
|
|
207
|
+
>>> FeatureStore.list_repos()
|
|
208
|
+
repos
|
|
209
|
+
0 vfs_v1
|
|
210
|
+
>>>
|
|
211
|
+
"""
|
|
212
|
+
return DataFrame.from_query("select distinct DataBaseName as repos from dbc.tablesV where TableName='{}'".format(
|
|
213
|
+
EFS_VERSION_SPEC["table_name"]))
|
|
214
|
+
|
|
215
|
+
def setup(self, perm_size='10e9', spool_size='10e8'):
|
|
216
|
+
"""
|
|
217
|
+
DESCRIPTION:
|
|
218
|
+
Function to setup all the required objects in Vantage for the specified
|
|
219
|
+
repository.
|
|
220
|
+
Note:
|
|
221
|
+
The function checks whether repository exists or not. If not exists,
|
|
222
|
+
it first creates the repository and then creates the corresponding tables.
|
|
223
|
+
Hence make sure the user with which is it connected to Vantage
|
|
224
|
+
has corresponding access rights for creating DataBase and creating
|
|
225
|
+
tables in the corresponding database.
|
|
226
|
+
|
|
227
|
+
PARAMETERS:
|
|
228
|
+
perm_size:
|
|
229
|
+
Optional Argument.
|
|
230
|
+
Specifies the number of bytes to allocate to FeatureStore "repo"
|
|
231
|
+
for permanent space.
|
|
232
|
+
Note:
|
|
233
|
+
Exponential notation can also be used.
|
|
234
|
+
Default Value: 10e9
|
|
235
|
+
Types: str or int
|
|
236
|
+
|
|
237
|
+
spool_size:
|
|
238
|
+
Optional Argument.
|
|
239
|
+
Specifies the number of bytes to allocate to FeatureStore "repo"
|
|
240
|
+
for spool space.
|
|
241
|
+
Note:
|
|
242
|
+
Exponential notation can also be used.
|
|
243
|
+
Default Value: 10e8
|
|
244
|
+
Types: str or int
|
|
245
|
+
|
|
246
|
+
RETURNS:
|
|
247
|
+
bool
|
|
248
|
+
|
|
249
|
+
RAISES:
|
|
250
|
+
TeradatamlException
|
|
251
|
+
|
|
252
|
+
EXAMPLES:
|
|
253
|
+
# Setup FeatureStore for repo 'vfs_v1'.
|
|
254
|
+
>>> from teradataml import FeatureStore
|
|
255
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
256
|
+
>>> fs.setup()
|
|
257
|
+
True
|
|
258
|
+
>>>
|
|
259
|
+
"""
|
|
260
|
+
|
|
261
|
+
repo_exists = get_connection().dialect._get_database_names(
|
|
262
|
+
get_connection(), self.__repo)
|
|
263
|
+
|
|
264
|
+
# If repo does not exist, then create it.
|
|
265
|
+
if not repo_exists:
|
|
266
|
+
_create_database(self.__repo, perm_size, spool_size)
|
|
267
|
+
|
|
268
|
+
# Check whether version table exists or not. If exist, assume all
|
|
269
|
+
# tables are available.
|
|
270
|
+
all_tables_exist = get_connection().dialect.has_table(
|
|
271
|
+
get_connection(), EFS_VERSION_SPEC['table_name'], schema=self.__repo)
|
|
272
|
+
|
|
273
|
+
if not all_tables_exist:
|
|
274
|
+
# Create the tables.
|
|
275
|
+
table_specs = [EFS_FEATURES_SPEC,
|
|
276
|
+
EFS_DATA_SOURCE_SPEC,
|
|
277
|
+
EFS_ENTITY_SPEC,
|
|
278
|
+
EFS_ENTITY_XREF_SPEC,
|
|
279
|
+
EFS_FEATURE_GROUP_SPEC,
|
|
280
|
+
EFS_GROUP_FEATURES_SPEC,
|
|
281
|
+
EFS_VERSION_SPEC]
|
|
282
|
+
|
|
283
|
+
staging_table_specs = [
|
|
284
|
+
EFS_FEATURES_STAGING_SPEC,
|
|
285
|
+
EFS_DATA_SOURCE_STAGING_SPEC,
|
|
286
|
+
EFS_ENTITY_STAGING_SPEC,
|
|
287
|
+
EFS_ENTITY_XREF_STAGING_SPEC,
|
|
288
|
+
EFS_GROUP_FEATURES_STAGING_SPEC,
|
|
289
|
+
EFS_FEATURE_GROUP_STAGING_SPEC
|
|
290
|
+
]
|
|
291
|
+
|
|
292
|
+
triggers_specs = [
|
|
293
|
+
EFS_FEATURES_TRG,
|
|
294
|
+
EFS_GROUP_FEATURES_TRG,
|
|
295
|
+
EFS_FEATURE_GROUP_TRG,
|
|
296
|
+
EFS_DATA_SOURCE_TRG,
|
|
297
|
+
EFS_ENTITY_TRG,
|
|
298
|
+
EFS_ENTITY_XREF_TRG
|
|
299
|
+
]
|
|
300
|
+
|
|
301
|
+
for table_spec in table_specs + staging_table_specs:
|
|
302
|
+
params_ = {"table_name": table_spec["table_name"],
|
|
303
|
+
"columns": table_spec["columns"],
|
|
304
|
+
"primary_index": table_spec.get("primary_index"),
|
|
305
|
+
"unique": True if table_spec.get("primary_index") else False,
|
|
306
|
+
"schema_name": self.__repo,
|
|
307
|
+
"set_table": False
|
|
308
|
+
}
|
|
309
|
+
if "foreign_keys" in table_spec:
|
|
310
|
+
params_["foreign_key_constraint"] = table_spec.get("foreign_keys")
|
|
311
|
+
|
|
312
|
+
_create_table(**params_)
|
|
313
|
+
|
|
314
|
+
for trigger_spec in triggers_specs:
|
|
315
|
+
execute_sql(trigger_spec.format(schema_name=self.__repo))
|
|
316
|
+
|
|
317
|
+
# After the setup is done, populate the version.
|
|
318
|
+
insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, EFS_VERSION_SPEC["table_name"])
|
|
319
|
+
execute_sql(insert_model, (EFS_VERSION, datetime.datetime.now()))
|
|
320
|
+
|
|
321
|
+
if repo_exists and all_tables_exist:
|
|
322
|
+
print("EFS is already setup for the repo {}.".format(self.__repo))
|
|
323
|
+
|
|
324
|
+
@property
|
|
325
|
+
def grant(self):
|
|
326
|
+
"""
|
|
327
|
+
DESCRIPTION:
|
|
328
|
+
Grants access on FeatureStore.
|
|
329
|
+
Note:
|
|
330
|
+
One must have admin access to grant access.
|
|
331
|
+
|
|
332
|
+
PARAMETERS:
|
|
333
|
+
None
|
|
334
|
+
|
|
335
|
+
RETURNS:
|
|
336
|
+
bool
|
|
337
|
+
|
|
338
|
+
RAISES:
|
|
339
|
+
OperationalError
|
|
340
|
+
|
|
341
|
+
EXAMPLES:
|
|
342
|
+
>>> from teradataml import FeatureStore
|
|
343
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
344
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
345
|
+
# Setup FeatureStore for this repository.
|
|
346
|
+
>>> fs.setup()
|
|
347
|
+
True
|
|
348
|
+
|
|
349
|
+
# Example 1: Grant read access on FeatureStore to user 'BoB'.
|
|
350
|
+
>>> fs.grant.read('BoB')
|
|
351
|
+
True
|
|
352
|
+
|
|
353
|
+
# Example 2: Grant write access on FeatureStore to user 'BoB'.
|
|
354
|
+
>>> fs.grant.write('BoB')
|
|
355
|
+
True
|
|
356
|
+
|
|
357
|
+
# Example 3: Grant read and write access on FeatureStore to user 'BoB'.
|
|
358
|
+
>>> fs.grant.read_write('BoB')
|
|
359
|
+
True
|
|
360
|
+
|
|
361
|
+
"""
|
|
362
|
+
table_names = {name: UtilFuncs._get_qualified_table_name(self.__repo, table_name)
|
|
363
|
+
for name, table_name in EFS_TABLES.items()}
|
|
364
|
+
return Grant(list(table_names.values()))
|
|
365
|
+
|
|
366
|
+
@property
|
|
367
|
+
def revoke(self):
|
|
368
|
+
"""
|
|
369
|
+
DESCRIPTION:
|
|
370
|
+
Revokes access on FeatureStore.
|
|
371
|
+
Note:
|
|
372
|
+
One must have admin access to revoke access.
|
|
373
|
+
|
|
374
|
+
PARAMETERS:
|
|
375
|
+
None
|
|
376
|
+
|
|
377
|
+
RETURNS:
|
|
378
|
+
bool
|
|
379
|
+
|
|
380
|
+
RAISES:
|
|
381
|
+
OperationalError
|
|
382
|
+
|
|
383
|
+
EXAMPLES:
|
|
384
|
+
>>> from teradataml import FeatureStore
|
|
385
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
386
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
387
|
+
# Setup FeatureStore for this repository.
|
|
388
|
+
>>> fs.setup()
|
|
389
|
+
True
|
|
390
|
+
|
|
391
|
+
# Example 1: Revoke read access on FeatureStore from user 'BoB'.
|
|
392
|
+
>>> fs.revoke.read('BoB')
|
|
393
|
+
True
|
|
394
|
+
|
|
395
|
+
# Example 2: Revoke write access on FeatureStore from user 'BoB'.
|
|
396
|
+
>>> fs.revoke.write('BoB')
|
|
397
|
+
True
|
|
398
|
+
|
|
399
|
+
# Example 3: Revoke read and write access on FeatureStore from user 'BoB'.
|
|
400
|
+
>>> fs.revoke.read_write('BoB')
|
|
401
|
+
True
|
|
402
|
+
"""
|
|
403
|
+
table_names = {name: UtilFuncs._get_qualified_table_name(self.__repo, table_name)
|
|
404
|
+
for name, table_name in EFS_TABLES.items()}
|
|
405
|
+
return Revoke(list(table_names.values()))
|
|
406
|
+
|
|
407
|
+
def repair(self):
|
|
408
|
+
"""
|
|
409
|
+
DESCRIPTION:
|
|
410
|
+
Repairs the existing repo.
|
|
411
|
+
Notes:
|
|
412
|
+
* The method checks for the corresponding missing database objects which are
|
|
413
|
+
required for FeatureStore. If any of the database object is not available,
|
|
414
|
+
then it tries to create the object.
|
|
415
|
+
* The method repairs only the underlying tables and not data inside the
|
|
416
|
+
corresponding table.
|
|
417
|
+
|
|
418
|
+
PARAMETERS:
|
|
419
|
+
None
|
|
420
|
+
|
|
421
|
+
RETURNS:
|
|
422
|
+
bool
|
|
423
|
+
|
|
424
|
+
RAISES:
|
|
425
|
+
TeradatamlException
|
|
426
|
+
|
|
427
|
+
EXAMPLES:
|
|
428
|
+
# Repair FeatureStore repo 'vfs_v1'.
|
|
429
|
+
>>> from teradataml import FeatureStore
|
|
430
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
431
|
+
>>> fs.repair()
|
|
432
|
+
True
|
|
433
|
+
>>>
|
|
434
|
+
"""
|
|
435
|
+
|
|
436
|
+
# Repair Features, Entities and DataSources first. Then FeatureGroup and then Group Features.
|
|
437
|
+
group_features_ = [EFS_GROUP_FEATURES_STAGING_SPEC, EFS_GROUP_FEATURES_SPEC, EFS_GROUP_FEATURES_TRG, "GroupFeatures"]
|
|
438
|
+
feature_group_ = [EFS_FEATURE_GROUP_STAGING_SPEC, EFS_FEATURE_GROUP_SPEC, EFS_FEATURE_GROUP_TRG, "FeatureGroup"]
|
|
439
|
+
featuers_ = [EFS_FEATURES_STAGING_SPEC, EFS_FEATURES_SPEC, EFS_FEATURES_TRG, "Feature"]
|
|
440
|
+
entities_ = [EFS_ENTITY_STAGING_SPEC, EFS_ENTITY_SPEC, EFS_ENTITY_TRG, "Entity"]
|
|
441
|
+
entities_xref_ = [EFS_ENTITY_XREF_STAGING_SPEC, EFS_ENTITY_XREF_SPEC, EFS_ENTITY_XREF_TRG, "EntityXref"]
|
|
442
|
+
data_sources_ = [EFS_DATA_SOURCE_STAGING_SPEC, EFS_DATA_SOURCE_SPEC, EFS_DATA_SOURCE_TRG, "DataSource"]
|
|
443
|
+
|
|
444
|
+
|
|
445
|
+
for staging_table_, table_, trigger, obj_name in (group_features_, feature_group_, featuers_, entities_, entities_xref_, data_sources_):
|
|
446
|
+
status = []
|
|
447
|
+
print("Repairing objects related to {}.".format(obj_name))
|
|
448
|
+
|
|
449
|
+
status.append(self.__try_create_table(staging_table_))
|
|
450
|
+
status.append(self.__try_create_table(table_))
|
|
451
|
+
status.append(self.__try_create_trigger(trigger, "{}_trg".format(table_["table_name"])))
|
|
452
|
+
|
|
453
|
+
# Let user know about status.
|
|
454
|
+
# If any of the status is Bad, then repair is failed.
|
|
455
|
+
# Else, If any of the status is Repaired, then sucessfully repaired.
|
|
456
|
+
# Else no need to repair the object.
|
|
457
|
+
if self.__bad_status in status:
|
|
458
|
+
print("Unable to repair objects related to {}.".format(obj_name))
|
|
459
|
+
else:
|
|
460
|
+
if self.__repaired_status in status:
|
|
461
|
+
print("Successfully repaired objects related to {}.".format(obj_name))
|
|
462
|
+
else:
|
|
463
|
+
print("{} objects are good and do not need any repair.".format(obj_name))
|
|
464
|
+
|
|
465
|
+
# Repair the version table.
|
|
466
|
+
status = self.__try_create_table(EFS_VERSION_SPEC)
|
|
467
|
+
if status == self.__repaired_status:
|
|
468
|
+
# After the setup is done, populate the version.
|
|
469
|
+
insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, EFS_VERSION_SPEC["table_name"])
|
|
470
|
+
execute_sql(insert_model, (EFS_VERSION, datetime.datetime.now()))
|
|
471
|
+
|
|
472
|
+
return True
|
|
473
|
+
|
|
474
|
+
def __try_create_table(self, table_spec):
|
|
475
|
+
"""
|
|
476
|
+
DESCRIPTION:
|
|
477
|
+
Internal function to create a table from table spec.
|
|
478
|
+
|
|
479
|
+
PARAMETERS:
|
|
480
|
+
table_spec:
|
|
481
|
+
Required Argument.
|
|
482
|
+
Specifies the spec for the corresponding table.
|
|
483
|
+
Types: dict
|
|
484
|
+
|
|
485
|
+
RETURNS:
|
|
486
|
+
str
|
|
487
|
+
Note:
|
|
488
|
+
Method can return three different values of strings.
|
|
489
|
+
* Good - When table to create already exists.
|
|
490
|
+
* Repaired - When is created.
|
|
491
|
+
* Bad - When table not exists and method unable to create table.
|
|
492
|
+
|
|
493
|
+
RAISES:
|
|
494
|
+
None
|
|
495
|
+
|
|
496
|
+
EXAMPLES:
|
|
497
|
+
self.__try_create_table(EFS_VERSION_SPEC)
|
|
498
|
+
"""
|
|
499
|
+
try:
|
|
500
|
+
_create_table(table_spec["table_name"],
|
|
501
|
+
columns=table_spec["columns"],
|
|
502
|
+
primary_index=table_spec.get("primary_index"),
|
|
503
|
+
unique=True if table_spec.get("primary_index") else False,
|
|
504
|
+
schema_name=self.__repo,
|
|
505
|
+
set_table=False)
|
|
506
|
+
return self.__repaired_status
|
|
507
|
+
except Exception as e:
|
|
508
|
+
if "Table '{}' already exists".format(table_spec["table_name"]) in str(e):
|
|
509
|
+
return self.__good_status
|
|
510
|
+
else:
|
|
511
|
+
print(str(e))
|
|
512
|
+
return self.__bad_status
|
|
513
|
+
|
|
514
|
+
def __try_create_trigger(self, trigger_spec, trigger_name):
|
|
515
|
+
"""
|
|
516
|
+
DESCRIPTION:
|
|
517
|
+
Internal function to create trigger.
|
|
518
|
+
|
|
519
|
+
PARAMETERS:
|
|
520
|
+
trigger_spec:
|
|
521
|
+
Required Argument.
|
|
522
|
+
Specifies the spec for the corresponding trigger.
|
|
523
|
+
Types: str
|
|
524
|
+
|
|
525
|
+
trigger_name:
|
|
526
|
+
Required Argument.
|
|
527
|
+
Specifies the name of the trigger to create.
|
|
528
|
+
Types: str
|
|
529
|
+
|
|
530
|
+
RETURNS:
|
|
531
|
+
str
|
|
532
|
+
Note:
|
|
533
|
+
Method can return three different values of strings.
|
|
534
|
+
* Good - When trigger to create already exists.
|
|
535
|
+
* Repaired - When trigger is created.
|
|
536
|
+
* Bad - When trigger not exists and method unable to create trigger.
|
|
537
|
+
|
|
538
|
+
RAISES:
|
|
539
|
+
None
|
|
540
|
+
|
|
541
|
+
EXAMPLES:
|
|
542
|
+
self.__try_create_trigger(EFS_FEATURE_TRIGGER_SPEC)
|
|
543
|
+
"""
|
|
544
|
+
try:
|
|
545
|
+
execute_sql(trigger_spec.format(schema_name=self.__repo))
|
|
546
|
+
return self.__repaired_status
|
|
547
|
+
except Exception as e:
|
|
548
|
+
if "Trigger '{}' already exists".format(trigger_name) in str(e):
|
|
549
|
+
return self.__good_status
|
|
550
|
+
else:
|
|
551
|
+
print("Unable to create trigger '{}'. Error - {}".format(trigger_name, str(e)))
|
|
552
|
+
return self.__bad_status
|
|
553
|
+
|
|
554
|
+
def list_features(self, archived=False) -> DataFrame:
|
|
555
|
+
"""
|
|
556
|
+
DESCRIPTION:
|
|
557
|
+
List all the features.
|
|
558
|
+
|
|
559
|
+
PARAMETERS:
|
|
560
|
+
archived:
|
|
561
|
+
Optional Argument.
|
|
562
|
+
Specifies whether to list effective features or archived features.
|
|
563
|
+
When set to False, effective features in FeatureStore are listed,
|
|
564
|
+
otherwise, archived features are listed.
|
|
565
|
+
Default Value: False
|
|
566
|
+
Types: bool
|
|
567
|
+
|
|
568
|
+
RETURNS:
|
|
569
|
+
teradataml DataFrame
|
|
570
|
+
|
|
571
|
+
RAISES:
|
|
572
|
+
None
|
|
573
|
+
|
|
574
|
+
EXAMPLES:
|
|
575
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
576
|
+
>>> load_example_data('dataframe', 'sales')
|
|
577
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
578
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
579
|
+
# Create teradataml DataFrame.
|
|
580
|
+
>>> df = DataFrame("sales")
|
|
581
|
+
# Create a FeatureGroup from teradataml DataFrame.
|
|
582
|
+
>>> fg = FeatureGroup.from_DataFrame(name='sales',
|
|
583
|
+
... entity_columns='accounts',
|
|
584
|
+
... df=df,
|
|
585
|
+
... timestamp_col_name='datetime')
|
|
586
|
+
# Apply the FeatureGroup to FeatureStore.
|
|
587
|
+
>>> fs.apply(fg)
|
|
588
|
+
True
|
|
589
|
+
|
|
590
|
+
# Example 1: List all the effective Features in the repo 'vfs_v1'.
|
|
591
|
+
>>> fs.list_features()
|
|
592
|
+
column_name description creation_time modified_time tags data_type feature_type status group_name
|
|
593
|
+
name
|
|
594
|
+
Mar Mar None 2024-09-30 11:21:43.314118 None None BIGINT CONTINUOUS ACTIVE sales
|
|
595
|
+
Jan Jan None 2024-09-30 11:21:42.655343 None None BIGINT CONTINUOUS ACTIVE sales
|
|
596
|
+
Apr Apr None 2024-09-30 11:21:44.143402 None None BIGINT CONTINUOUS ACTIVE sales
|
|
597
|
+
Feb Feb None 2024-09-30 11:21:41.542627 None None FLOAT CONTINUOUS ACTIVE sales
|
|
598
|
+
>>>
|
|
599
|
+
|
|
600
|
+
# Example 2: List all the archived Features in the repo 'vfs_v1'.
|
|
601
|
+
# Note: Feature can only be archived when it is not associated with any Group.
|
|
602
|
+
# Let's remove Feature 'Feb' from FeatureGroup.
|
|
603
|
+
>>> fg.remove(fs.get_feature('Feb'))
|
|
604
|
+
True
|
|
605
|
+
# Apply the modified FeatureGroup to FeatureStore.
|
|
606
|
+
>>> fs.apply(fg)
|
|
607
|
+
True
|
|
608
|
+
# Archive Feature 'Feb'.
|
|
609
|
+
>>> fs.archive_feature('Feb')
|
|
610
|
+
Feature 'Feb' is archived.
|
|
611
|
+
True
|
|
612
|
+
|
|
613
|
+
# List all the archived Features in the repo 'vfs_v1'.
|
|
614
|
+
>>> fs.list_features(archived=True)
|
|
615
|
+
name column_name description creation_time modified_time tags data_type feature_type status archived_time group_name
|
|
616
|
+
0 Feb Feb None 2024-09-30 11:21:41.542627 None None FLOAT CONTINUOUS ACTIVE 2024-09-30 11:30:49.160000 sales
|
|
617
|
+
>>>
|
|
618
|
+
"""
|
|
619
|
+
return self.__get_archived_features_df() if archived else self.__get_features_df()
|
|
620
|
+
|
|
621
|
+
def list_entities(self, archived=False) -> DataFrame:
|
|
622
|
+
"""
|
|
623
|
+
DESCRIPTION:
|
|
624
|
+
List all the entities.
|
|
625
|
+
|
|
626
|
+
PARAMETERS:
|
|
627
|
+
archived:
|
|
628
|
+
Optional Argument.
|
|
629
|
+
Specifies whether to list effective entities or archived entities.
|
|
630
|
+
When set to False, effective entities in FeatureStore are listed,
|
|
631
|
+
otherwise, archived entities are listed.
|
|
632
|
+
Default Value: False
|
|
633
|
+
Types: bool
|
|
634
|
+
|
|
635
|
+
RETURNS:
|
|
636
|
+
teradataml DataFrame
|
|
637
|
+
|
|
638
|
+
RAISES:
|
|
639
|
+
None
|
|
640
|
+
|
|
641
|
+
EXAMPLES:
|
|
642
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
643
|
+
>>> load_example_data('dataframe', 'sales')
|
|
644
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
645
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
646
|
+
# Create teradataml DataFrame.
|
|
647
|
+
>>> df = DataFrame("sales")
|
|
648
|
+
# Create a FeatureGroup from teradataml DataFrame.
|
|
649
|
+
>>> fg = FeatureGroup.from_DataFrame(name='sales',
|
|
650
|
+
... entity_columns='accounts',
|
|
651
|
+
... df=df,
|
|
652
|
+
... timestamp_col_name='datetime')
|
|
653
|
+
# Apply the FeatureGroup to FeatureStore.
|
|
654
|
+
>>> fs.apply(fg)
|
|
655
|
+
True
|
|
656
|
+
|
|
657
|
+
# Example 1: List all the effective Entities in the repo 'vfs_v1'.
|
|
658
|
+
>>> fs.list_entities()
|
|
659
|
+
description
|
|
660
|
+
name entity_column
|
|
661
|
+
sales accounts None
|
|
662
|
+
>>>
|
|
663
|
+
|
|
664
|
+
# Example 2: List all the archived Entities in the repo 'vfs_v1'.
|
|
665
|
+
# Note: Entity cannot be archived if it is a part of FeatureGroup.
|
|
666
|
+
# First create another Entity, and update FeatureGroup with
|
|
667
|
+
# other Entity. Then archive Entity 'sales'.
|
|
668
|
+
>>> entity = Entity('store_sales', columns=df.accounts)
|
|
669
|
+
# Update new entity to FeatureGroup.
|
|
670
|
+
>>> fg.apply(entity)
|
|
671
|
+
# Update FeatureGroup to FeatureStore. This will update Entity
|
|
672
|
+
# from 'sales' to 'store_sales' for FeatureGroup 'sales'.
|
|
673
|
+
>>> fs.apply(fg)
|
|
674
|
+
True
|
|
675
|
+
# Let's archive Entity 'sales' since it is not part of any FeatureGroup.
|
|
676
|
+
>>> fs.archive_entity('sales')
|
|
677
|
+
Entity 'sales' is archived.
|
|
678
|
+
True
|
|
679
|
+
>>>
|
|
680
|
+
|
|
681
|
+
# List the archived entities.
|
|
682
|
+
>>> fs.list_entities(archived=True)
|
|
683
|
+
name description creation_time modified_time archived_time entity_column
|
|
684
|
+
0 sales None 2024-10-18 05:41:36.932856 None 2024-10-18 05:50:00.930000 accounts
|
|
685
|
+
>>>
|
|
686
|
+
"""
|
|
687
|
+
return self.__get_archived_entity_df() if archived else self.__get_entity_df()
|
|
688
|
+
|
|
689
|
+
def list_data_sources(self, archived=False) -> DataFrame:
|
|
690
|
+
"""
|
|
691
|
+
DESCRIPTION:
|
|
692
|
+
List all the Data Sources.
|
|
693
|
+
|
|
694
|
+
PARAMETERS:
|
|
695
|
+
archived:
|
|
696
|
+
Optional Argument.
|
|
697
|
+
Specifies whether to list effective data sources or archived data sources.
|
|
698
|
+
When set to False, effective data sources in FeatureStore are listed,
|
|
699
|
+
otherwise, archived data sources are listed.
|
|
700
|
+
Default Value: False
|
|
701
|
+
Types: bool
|
|
702
|
+
|
|
703
|
+
RETURNS:
|
|
704
|
+
teradataml DataFrame
|
|
705
|
+
|
|
706
|
+
RAISES:
|
|
707
|
+
None
|
|
708
|
+
|
|
709
|
+
EXAMPLES:
|
|
710
|
+
>>> from teradataml import DataSource, FeatureStore, load_example_data
|
|
711
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
712
|
+
# Create teradataml DataFrame.
|
|
713
|
+
>>> admissions=DataFrame("admissions_train")
|
|
714
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
715
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
716
|
+
# Create DataSource using teradataml DataFrame.
|
|
717
|
+
>>> ds = DataSource(name='admissions', source=admissions)
|
|
718
|
+
# Apply the DataSource to FeatureStore.
|
|
719
|
+
>>> fs.apply(ds)
|
|
720
|
+
True
|
|
721
|
+
|
|
722
|
+
# Example 1: List all the effective DataSources in the repo 'vfs_v1'.
|
|
723
|
+
>>> fs.list_data_sources()
|
|
724
|
+
description timestamp_col_name source
|
|
725
|
+
name
|
|
726
|
+
admissions None None select * from "admissions_train"
|
|
727
|
+
>>>
|
|
728
|
+
|
|
729
|
+
# Example 2: List all the archived DataSources in the repo 'vfs_v1'.
|
|
730
|
+
# Let's first archive the DataSource.
|
|
731
|
+
>>> fs.archive_data_source('admissions')
|
|
732
|
+
DataSource 'admissions' is archived.
|
|
733
|
+
True
|
|
734
|
+
# List archived DataSources.
|
|
735
|
+
>>> fs.list_data_sources(archived=True)
|
|
736
|
+
description timestamp_col_name source archived_time
|
|
737
|
+
name
|
|
738
|
+
admissions None None select * from "admissions_train" 2024-09-30 12:05:39.220000
|
|
739
|
+
>>>
|
|
740
|
+
"""
|
|
741
|
+
return self.__get_archived_data_source_df() if archived else self.__get_data_source_df()
|
|
742
|
+
|
|
743
|
+
def list_feature_groups(self, archived=False) -> DataFrame:
|
|
744
|
+
"""
|
|
745
|
+
DESCRIPTION:
|
|
746
|
+
List all the FeatureGroups.
|
|
747
|
+
|
|
748
|
+
PARAMETERS:
|
|
749
|
+
archived:
|
|
750
|
+
Optional Argument.
|
|
751
|
+
Specifies whether to list effective feature groups or archived feature groups.
|
|
752
|
+
When set to False, effective feature groups in FeatureStore are listed,
|
|
753
|
+
otherwise, archived feature groups are listed.
|
|
754
|
+
Default Value: False
|
|
755
|
+
Types: bool
|
|
756
|
+
|
|
757
|
+
RETURNS:
|
|
758
|
+
teradataml DataFrame
|
|
759
|
+
|
|
760
|
+
RAISES:
|
|
761
|
+
None
|
|
762
|
+
|
|
763
|
+
EXAMPLES:
|
|
764
|
+
>>> from teradataml import FeatureGroup, FeatureStore, load_example_data
|
|
765
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
766
|
+
# Create teradataml DataFrame.
|
|
767
|
+
>>> admissions=DataFrame("admissions_train")
|
|
768
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
769
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
770
|
+
# Create a FeatureGroup from DataFrame.
|
|
771
|
+
>>> fg = FeatureGroup.from_DataFrame("admissions", df=admissions, entity_columns='id')
|
|
772
|
+
# Apply FeatureGroup to FeatureStore.
|
|
773
|
+
>>> fs.apply(fg)
|
|
774
|
+
True
|
|
775
|
+
|
|
776
|
+
# Example 1: List all the effective FeatureGroups in the repo 'vfs_v1'.
|
|
777
|
+
>>> fs.list_feature_groups()
|
|
778
|
+
description data_source_name entity_name
|
|
779
|
+
name
|
|
780
|
+
admissions None admissions admissions
|
|
781
|
+
>>>
|
|
782
|
+
|
|
783
|
+
# Example 2: List all the archived FeatureGroups in the repo 'vfs_v1'.
|
|
784
|
+
# Let's first archive the FeatureGroup.
|
|
785
|
+
>>> fs.archive_feature_group("admissions")
|
|
786
|
+
True
|
|
787
|
+
>>>
|
|
788
|
+
# List archived FeatureGroups.
|
|
789
|
+
>>> fs.list_feature_groups(archived=True)
|
|
790
|
+
name description data_source_name entity_name archived_time
|
|
791
|
+
0 admissions None admissions admissions 2024-09-30 12:05:39.220000
|
|
792
|
+
>>>
|
|
793
|
+
"""
|
|
794
|
+
return self.__get_archived_feature_group_df() if archived else self.__get_feature_group_df()
|
|
795
|
+
|
|
796
|
+
def get_feature(self, name):
|
|
797
|
+
"""
|
|
798
|
+
DESCRIPTION:
|
|
799
|
+
Retrieve the feature.
|
|
800
|
+
|
|
801
|
+
PARAMETERS:
|
|
802
|
+
name:
|
|
803
|
+
Required Argument.
|
|
804
|
+
Specifies the name of the feature to get.
|
|
805
|
+
Types: str
|
|
806
|
+
|
|
807
|
+
RETURNS:
|
|
808
|
+
Feature.
|
|
809
|
+
|
|
810
|
+
RAISES:
|
|
811
|
+
TeradataMLException
|
|
812
|
+
|
|
813
|
+
EXAMPLES:
|
|
814
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
815
|
+
# Load the sales data to Vantage.
|
|
816
|
+
>>> load_example_data("dataframe", "sales")
|
|
817
|
+
# Create DataFrame on sales data.
|
|
818
|
+
>>> df = DataFrame("sales")
|
|
819
|
+
>>> df
|
|
820
|
+
Feb Jan Mar Apr datetime
|
|
821
|
+
accounts
|
|
822
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
823
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
824
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
825
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
826
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
827
|
+
>>>
|
|
828
|
+
# Create Feature for column 'Mar' with name 'sales_mar'.
|
|
829
|
+
>>> feature = Feature('sales_mar', column=df.Mar)
|
|
830
|
+
# Apply the Feature to FeatureStore.
|
|
831
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
832
|
+
>>> fs.apply(feature)
|
|
833
|
+
True
|
|
834
|
+
|
|
835
|
+
# Get the feature 'sales_mar' from repo 'vfs_v1'.
|
|
836
|
+
>>> feature = fs.get_feature('sales_mar')
|
|
837
|
+
>>> feature
|
|
838
|
+
Feature(name=sales_mar)
|
|
839
|
+
>>>
|
|
840
|
+
"""
|
|
841
|
+
argument_validation_params = []
|
|
842
|
+
argument_validation_params.append(["name", name, False, (str), True])
|
|
843
|
+
|
|
844
|
+
# Validate argument types
|
|
845
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
846
|
+
|
|
847
|
+
df = self.list_features()
|
|
848
|
+
df = df[df.name == name]
|
|
849
|
+
|
|
850
|
+
# Check if a feature with that name exists or not. If not, raise error.
|
|
851
|
+
if df.shape[0] == 0:
|
|
852
|
+
msg_code = MessageCodes.FUNC_EXECUTION_FAILED
|
|
853
|
+
error_msg = Messages.get_message(
|
|
854
|
+
msg_code, "get_feature()", "Feature with name '{}' does not exist.".format(name))
|
|
855
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
856
|
+
|
|
857
|
+
return Feature._from_df(df)
|
|
858
|
+
|
|
859
|
+
def get_group_features(self, group_name):
|
|
860
|
+
"""
|
|
861
|
+
DESCRIPTION:
|
|
862
|
+
Get the Features from the given feature group name.
|
|
863
|
+
|
|
864
|
+
PARAMETERS:
|
|
865
|
+
group_name:
|
|
866
|
+
Required Argument.
|
|
867
|
+
Specifies the name of the group the feature belongs to.
|
|
868
|
+
Types: str
|
|
869
|
+
|
|
870
|
+
RETURNS:
|
|
871
|
+
List of Feature objects.
|
|
872
|
+
|
|
873
|
+
RAISES:
|
|
874
|
+
TeradataMLException
|
|
875
|
+
|
|
876
|
+
EXAMPLES:
|
|
877
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
878
|
+
# Load the sales data to Vantage.
|
|
879
|
+
>>> load_example_data("dataframe", "sales")
|
|
880
|
+
# Create DataFrame on sales data.
|
|
881
|
+
>>> df = DataFrame("sales")
|
|
882
|
+
>>> df
|
|
883
|
+
>>> df
|
|
884
|
+
Feb Jan Mar Apr datetime
|
|
885
|
+
accounts
|
|
886
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
887
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
888
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
889
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
890
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
891
|
+
>>>
|
|
892
|
+
# Create FeatureGroup with name 'sales' from DataFrame.
|
|
893
|
+
>>> fg = FeatureGroup.from_DataFrame(
|
|
894
|
+
... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
|
|
895
|
+
# Apply the FeatureGroup to FeatureStore.
|
|
896
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
897
|
+
>>> fs.apply(fg)
|
|
898
|
+
True
|
|
899
|
+
|
|
900
|
+
# Get all the features belongs to the group 'sales' from repo 'vfs_v1'.
|
|
901
|
+
>>> features = fs.get_group_features('sales')
|
|
902
|
+
>>> features
|
|
903
|
+
[Feature(name=Jan), Feature(name=Feb), Feature(name=Apr), Feature(name=Mar)]
|
|
904
|
+
>>>
|
|
905
|
+
"""
|
|
906
|
+
argument_validation_params = []
|
|
907
|
+
argument_validation_params.append(["group_name", group_name, False, (str), True])
|
|
908
|
+
|
|
909
|
+
# Validate argument types
|
|
910
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
911
|
+
|
|
912
|
+
# Select active features.
|
|
913
|
+
features_df = self.__get_features_df()
|
|
914
|
+
features_df = features_df[((features_df.status != FeatureStatus.INACTIVE.name) & (features_df.group_name == group_name))]
|
|
915
|
+
|
|
916
|
+
# Check if a feature with that group name exists or not. If not, raise error.
|
|
917
|
+
if features_df.shape[0] == 0:
|
|
918
|
+
msg_code = MessageCodes.FUNC_EXECUTION_FAILED
|
|
919
|
+
error_msg = Messages.get_message(
|
|
920
|
+
msg_code, "get_group_features()", "No features found for group '{}'.".format(group_name))
|
|
921
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
922
|
+
|
|
923
|
+
return Feature._from_df(features_df)
|
|
924
|
+
|
|
925
|
+
def get_feature_group(self, name):
|
|
926
|
+
"""
|
|
927
|
+
DESCRIPTION:
|
|
928
|
+
Retrieve the FeatureGroup using name.
|
|
929
|
+
|
|
930
|
+
PARAMETERS:
|
|
931
|
+
name:
|
|
932
|
+
Required Argument.
|
|
933
|
+
Specifies the name of the feature group to be retrieved.
|
|
934
|
+
Types: str
|
|
935
|
+
|
|
936
|
+
RETURNS:
|
|
937
|
+
Object of FeatureGroup
|
|
938
|
+
|
|
939
|
+
RAISES:
|
|
940
|
+
TeradataMLException
|
|
941
|
+
|
|
942
|
+
EXAMPLES:
|
|
943
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
944
|
+
# Load the sales data to Vantage.
|
|
945
|
+
>>> load_example_data("dataframe", "sales")
|
|
946
|
+
# Create DataFrame on sales data.
|
|
947
|
+
>>> df = DataFrame("sales")
|
|
948
|
+
>>> df
|
|
949
|
+
Feb Jan Mar Apr datetime
|
|
950
|
+
accounts
|
|
951
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
952
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
953
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
954
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
955
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
956
|
+
>>>
|
|
957
|
+
# Create FeatureGroup with name 'sales' from DataFrame.
|
|
958
|
+
>>> fg = FeatureGroup.from_DataFrame(
|
|
959
|
+
... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
|
|
960
|
+
# Apply the FeatureGroup to FeatureStore.
|
|
961
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
962
|
+
>>> fs.apply(fg)
|
|
963
|
+
True
|
|
964
|
+
|
|
965
|
+
# Get FeatureGroup with group name 'sales' from repo 'vfs_v1'.
|
|
966
|
+
>>> fg = fs.get_feature_group('sales')
|
|
967
|
+
>>> fg
|
|
968
|
+
FeatureGroup(sales, features=[Feature(name=Jan), Feature(name=Feb), Feature(name=Apr), Feature(name=Mar)], entity=Entity(name=sales), data_source=DataSource(name=sales))
|
|
969
|
+
>>>
|
|
970
|
+
"""
|
|
971
|
+
argument_validation_params = []
|
|
972
|
+
argument_validation_params.append(["name", name, False, (str), True])
|
|
973
|
+
|
|
974
|
+
# Validate argument types
|
|
975
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
976
|
+
|
|
977
|
+
df = self.list_feature_groups()
|
|
978
|
+
df = df[df.name == name]
|
|
979
|
+
|
|
980
|
+
# Check if a feature with that name exists or not. If not, raise error.
|
|
981
|
+
if df.shape[0] == 0:
|
|
982
|
+
msg_code = MessageCodes.FUNC_EXECUTION_FAILED
|
|
983
|
+
error_msg = Messages.get_message(
|
|
984
|
+
msg_code, "get_feature_group()", "FeatureGroup with name '{}' does not exist.".format(name))
|
|
985
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
986
|
+
|
|
987
|
+
return FeatureGroup._from_df(df,
|
|
988
|
+
self.__repo,
|
|
989
|
+
self.__get_features_df(),
|
|
990
|
+
self.__get_entity_df(),
|
|
991
|
+
self.__get_data_source_df()
|
|
992
|
+
)
|
|
993
|
+
|
|
994
|
+
def get_entity(self, name):
|
|
995
|
+
"""
|
|
996
|
+
DESCRIPTION:
|
|
997
|
+
Get the entity from feature store.
|
|
998
|
+
|
|
999
|
+
PARAMETERS:
|
|
1000
|
+
name:
|
|
1001
|
+
Required Argument.
|
|
1002
|
+
Specifies the name of the entity.
|
|
1003
|
+
Types: str
|
|
1004
|
+
|
|
1005
|
+
RETURNS:
|
|
1006
|
+
Object of Entity.
|
|
1007
|
+
|
|
1008
|
+
RAISES:
|
|
1009
|
+
None
|
|
1010
|
+
|
|
1011
|
+
EXAMPLES:
|
|
1012
|
+
>>> from teradataml import DataFrame, Entity, FeatureStore, load_example_data
|
|
1013
|
+
# Load the admissions data to Vantage.
|
|
1014
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1015
|
+
# Create DataFrame on admissions data.
|
|
1016
|
+
>>> df = DataFrame("admissions_train")
|
|
1017
|
+
>>> df
|
|
1018
|
+
masters gpa stats programming admitted
|
|
1019
|
+
id
|
|
1020
|
+
34 yes 3.85 Advanced Beginner 0
|
|
1021
|
+
32 yes 3.46 Advanced Beginner 0
|
|
1022
|
+
11 no 3.13 Advanced Advanced 1
|
|
1023
|
+
40 yes 3.95 Novice Beginner 0
|
|
1024
|
+
38 yes 2.65 Advanced Beginner 1
|
|
1025
|
+
36 no 3.00 Advanced Novice 0
|
|
1026
|
+
7 yes 2.33 Novice Novice 1
|
|
1027
|
+
26 yes 3.57 Advanced Advanced 1
|
|
1028
|
+
19 yes 1.98 Advanced Advanced 0
|
|
1029
|
+
13 no 4.00 Advanced Novice 1
|
|
1030
|
+
>>>
|
|
1031
|
+
# Create Entity for column 'id' with name 'admissions_id'.
|
|
1032
|
+
>>> entity = Entity(name='admissions_id', description="Entity for admissions", columns=df.id)
|
|
1033
|
+
# Apply the Entity to FeatureStore 'vfs_v1'.
|
|
1034
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1035
|
+
>>> fs.apply(entity)
|
|
1036
|
+
True
|
|
1037
|
+
>>>
|
|
1038
|
+
|
|
1039
|
+
# Get the Entity 'admissions_id' from repo 'vfs_v1'
|
|
1040
|
+
>>> entity = fs.get_entity('admissions_id')
|
|
1041
|
+
>>> entity
|
|
1042
|
+
Entity(name=admissions_id)
|
|
1043
|
+
>>>
|
|
1044
|
+
"""
|
|
1045
|
+
argument_validation_params = []
|
|
1046
|
+
argument_validation_params.append(["name", name, False, (str), True])
|
|
1047
|
+
|
|
1048
|
+
# Validate argument types
|
|
1049
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
1050
|
+
|
|
1051
|
+
df = self.__get_entity_df()
|
|
1052
|
+
df = df[df.name==name]
|
|
1053
|
+
|
|
1054
|
+
# Check if entity with that name exists or not. If not, raise error.
|
|
1055
|
+
if df.shape[0] == 0:
|
|
1056
|
+
msg_code = MessageCodes.FUNC_EXECUTION_FAILED
|
|
1057
|
+
error_msg = Messages.get_message(
|
|
1058
|
+
msg_code, "get_entity()", "Entity with name '{}' does not exist.".format(name))
|
|
1059
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
1060
|
+
return Entity._from_df(df)
|
|
1061
|
+
|
|
1062
|
+
def get_data_source(self, name):
|
|
1063
|
+
"""
|
|
1064
|
+
DESCRIPTION:
|
|
1065
|
+
Get the data source from feature store.
|
|
1066
|
+
|
|
1067
|
+
PARAMETERS:
|
|
1068
|
+
name:
|
|
1069
|
+
Required Argument.
|
|
1070
|
+
Specifies the name of the data source.
|
|
1071
|
+
Types: str
|
|
1072
|
+
|
|
1073
|
+
RETURNS:
|
|
1074
|
+
Object of DataSource.
|
|
1075
|
+
|
|
1076
|
+
RAISES:
|
|
1077
|
+
TeradataMLException
|
|
1078
|
+
|
|
1079
|
+
EXAMPLES:
|
|
1080
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
1081
|
+
# Load the admissions data to Vantage.
|
|
1082
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1083
|
+
# Create DataFrame on admissions data.
|
|
1084
|
+
>>> df = DataFrame("admissions_train")
|
|
1085
|
+
>>> df
|
|
1086
|
+
masters gpa stats programming admitted
|
|
1087
|
+
id
|
|
1088
|
+
34 yes 3.85 Advanced Beginner 0
|
|
1089
|
+
32 yes 3.46 Advanced Beginner 0
|
|
1090
|
+
11 no 3.13 Advanced Advanced 1
|
|
1091
|
+
40 yes 3.95 Novice Beginner 0
|
|
1092
|
+
38 yes 2.65 Advanced Beginner 1
|
|
1093
|
+
36 no 3.00 Advanced Novice 0
|
|
1094
|
+
7 yes 2.33 Novice Novice 1
|
|
1095
|
+
26 yes 3.57 Advanced Advanced 1
|
|
1096
|
+
19 yes 1.98 Advanced Advanced 0
|
|
1097
|
+
13 no 4.00 Advanced Novice 1
|
|
1098
|
+
>>>
|
|
1099
|
+
# Create DataSource using DataFrame 'df' with name 'admissions'.
|
|
1100
|
+
>>> ds = DataSource('admissions', source=df)
|
|
1101
|
+
# Apply the DataSource to FeatureStore 'vfs_v1'.
|
|
1102
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1103
|
+
>>> fs.apply(ds)
|
|
1104
|
+
True
|
|
1105
|
+
>>>
|
|
1106
|
+
|
|
1107
|
+
# Get the DataSource 'admissions' from repo 'vfs_v1'
|
|
1108
|
+
>>> ds = fs.get_data_source('admissions')
|
|
1109
|
+
>>> ds
|
|
1110
|
+
DataSource(name=admissions)
|
|
1111
|
+
>>>
|
|
1112
|
+
"""
|
|
1113
|
+
argument_validation_params = []
|
|
1114
|
+
argument_validation_params.append(["name", name, False, (str), True])
|
|
1115
|
+
|
|
1116
|
+
# Validate argument types
|
|
1117
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
1118
|
+
|
|
1119
|
+
df = self.__get_data_source_df()
|
|
1120
|
+
df = df[df.name == name]
|
|
1121
|
+
|
|
1122
|
+
# Check if a entity with that name exists or not. If not, raise error.
|
|
1123
|
+
if df.shape[0] == 0:
|
|
1124
|
+
msg_code = MessageCodes.FUNC_EXECUTION_FAILED
|
|
1125
|
+
error_msg = Messages.get_message(
|
|
1126
|
+
msg_code, "get_data_source()", "DataSource with name '{}' does not exist.".format(name))
|
|
1127
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
1128
|
+
|
|
1129
|
+
return DataSource._from_df(df)
|
|
1130
|
+
|
|
1131
|
+
def set_features_inactive(self, names):
|
|
1132
|
+
"""
|
|
1133
|
+
DESCRIPTION:
|
|
1134
|
+
Mark the feature status as 'inactive'. Note that, inactive features are
|
|
1135
|
+
not available for any further processing. Set the status as 'active' with
|
|
1136
|
+
"set_features_active()" method.
|
|
1137
|
+
|
|
1138
|
+
PARAMETERS:
|
|
1139
|
+
names:
|
|
1140
|
+
Required Argument.
|
|
1141
|
+
Specifies the name(s) of the feature(s).
|
|
1142
|
+
Types: str OR list of str
|
|
1143
|
+
|
|
1144
|
+
RETURNS:
|
|
1145
|
+
bool
|
|
1146
|
+
|
|
1147
|
+
RAISES:
|
|
1148
|
+
teradataMLException
|
|
1149
|
+
|
|
1150
|
+
EXAMPLES:
|
|
1151
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
1152
|
+
# Load the admissions data to Vantage.
|
|
1153
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1154
|
+
# Create DataFrame on admissions data.
|
|
1155
|
+
>>> df = DataFrame("admissions_train")
|
|
1156
|
+
>>> df
|
|
1157
|
+
masters gpa stats programming admitted
|
|
1158
|
+
id
|
|
1159
|
+
34 yes 3.85 Advanced Beginner 0
|
|
1160
|
+
32 yes 3.46 Advanced Beginner 0
|
|
1161
|
+
11 no 3.13 Advanced Advanced 1
|
|
1162
|
+
40 yes 3.95 Novice Beginner 0
|
|
1163
|
+
38 yes 2.65 Advanced Beginner 1
|
|
1164
|
+
36 no 3.00 Advanced Novice 0
|
|
1165
|
+
7 yes 2.33 Novice Novice 1
|
|
1166
|
+
26 yes 3.57 Advanced Advanced 1
|
|
1167
|
+
19 yes 1.98 Advanced Advanced 0
|
|
1168
|
+
13 no 4.00 Advanced Novice 1
|
|
1169
|
+
>>>
|
|
1170
|
+
# Create FeatureGroup from DataFrame df.
|
|
1171
|
+
>>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
|
|
1172
|
+
# Apply the FeatureGroup to FeatureStore 'vfs_v1'.
|
|
1173
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1174
|
+
>>> fs.apply(fg)
|
|
1175
|
+
True
|
|
1176
|
+
# Get FeatureGroup 'admissions' from FeatureStore.
|
|
1177
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
1178
|
+
>>> fg
|
|
1179
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
1180
|
+
|
|
1181
|
+
# Set the Feature 'programming' inactive.
|
|
1182
|
+
>>> fs.set_features_inactive('programming')
|
|
1183
|
+
True
|
|
1184
|
+
# Get FeatureGroup again after setting feature inactive.
|
|
1185
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
1186
|
+
>>> fg
|
|
1187
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
1188
|
+
>>>
|
|
1189
|
+
"""
|
|
1190
|
+
return self.__set_active_inactive_features(names, active=False)
|
|
1191
|
+
|
|
1192
|
+
def set_features_active(self, names):
|
|
1193
|
+
"""
|
|
1194
|
+
DESCRIPTION:
|
|
1195
|
+
Mark the feature status as active. Set the status as 'inactive' with
|
|
1196
|
+
"set_features_inactive()" method. Note that, inactive features are
|
|
1197
|
+
not available for any further processing.
|
|
1198
|
+
|
|
1199
|
+
PARAMETERS:
|
|
1200
|
+
names:
|
|
1201
|
+
Required Argument.
|
|
1202
|
+
Specifies the name(s) of the feature(s).
|
|
1203
|
+
Types: str OR list of str
|
|
1204
|
+
|
|
1205
|
+
RETURNS:
|
|
1206
|
+
bool
|
|
1207
|
+
|
|
1208
|
+
RAISES:
|
|
1209
|
+
teradataMLException
|
|
1210
|
+
|
|
1211
|
+
EXAMPLES:
|
|
1212
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
1213
|
+
# Load the admissions data to Vantage.
|
|
1214
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1215
|
+
# Create DataFrame on admissions data.
|
|
1216
|
+
>>> df = DataFrame("admissions_train")
|
|
1217
|
+
>>> df
|
|
1218
|
+
masters gpa stats programming admitted
|
|
1219
|
+
id
|
|
1220
|
+
34 yes 3.85 Advanced Beginner 0
|
|
1221
|
+
32 yes 3.46 Advanced Beginner 0
|
|
1222
|
+
11 no 3.13 Advanced Advanced 1
|
|
1223
|
+
40 yes 3.95 Novice Beginner 0
|
|
1224
|
+
38 yes 2.65 Advanced Beginner 1
|
|
1225
|
+
36 no 3.00 Advanced Novice 0
|
|
1226
|
+
7 yes 2.33 Novice Novice 1
|
|
1227
|
+
26 yes 3.57 Advanced Advanced 1
|
|
1228
|
+
19 yes 1.98 Advanced Advanced 0
|
|
1229
|
+
13 no 4.00 Advanced Novice 1
|
|
1230
|
+
>>>
|
|
1231
|
+
# Create FeatureGroup from DataFrame df.
|
|
1232
|
+
>>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
|
|
1233
|
+
# Apply the FeatureGroup to FeatureStore 'vfs_v1'.
|
|
1234
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1235
|
+
>>> fs.apply(fg)
|
|
1236
|
+
True
|
|
1237
|
+
# Get FeatureGroup 'admissions' from FeatureStore.
|
|
1238
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
1239
|
+
>>> fg
|
|
1240
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
1241
|
+
# Set the Feature 'programming' inactive.
|
|
1242
|
+
>>> fs.set_features_inactive('programming')
|
|
1243
|
+
True
|
|
1244
|
+
# Get FeatureGroup again after setting feature inactive.
|
|
1245
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
1246
|
+
>>> fg
|
|
1247
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
1248
|
+
>>>
|
|
1249
|
+
|
|
1250
|
+
# Mark Feature 'programming' from 'inactive' to 'active'.
|
|
1251
|
+
>>> fs.set_features_active('programming')
|
|
1252
|
+
# Get FeatureGroup again after setting feature active.
|
|
1253
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
1254
|
+
>>> fg
|
|
1255
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
1256
|
+
>>>
|
|
1257
|
+
"""
|
|
1258
|
+
return self.__set_active_inactive_features(names, active=True)
|
|
1259
|
+
|
|
1260
|
+
def __set_active_inactive_features(self, names, active):
|
|
1261
|
+
"""
|
|
1262
|
+
DESCRIPTION:
|
|
1263
|
+
Internal function to either active or inactive features.
|
|
1264
|
+
|
|
1265
|
+
PARAMETERS:
|
|
1266
|
+
names:
|
|
1267
|
+
Required Argument.
|
|
1268
|
+
Specifies the name the feature.
|
|
1269
|
+
Types: str OR list of str
|
|
1270
|
+
|
|
1271
|
+
RETURNS:
|
|
1272
|
+
bool
|
|
1273
|
+
|
|
1274
|
+
RAISES:
|
|
1275
|
+
teradataMLException
|
|
1276
|
+
|
|
1277
|
+
EXAMPLES:
|
|
1278
|
+
# Example 1: Archive the feature 'feature1' in the repo
|
|
1279
|
+
# 'vfs_v1'.
|
|
1280
|
+
>>> from teradataml import FeatureStore
|
|
1281
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1282
|
+
>>> fs.__archive_unarchive_features(name='feature1')
|
|
1283
|
+
True
|
|
1284
|
+
>>>
|
|
1285
|
+
"""
|
|
1286
|
+
names = UtilFuncs._as_list(names)
|
|
1287
|
+
|
|
1288
|
+
argument_validation_params = []
|
|
1289
|
+
argument_validation_params.append(["names", names, False, (str, list), True])
|
|
1290
|
+
|
|
1291
|
+
# Validate argument types
|
|
1292
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
1293
|
+
|
|
1294
|
+
status = FeatureStatus.ACTIVE.name if active else FeatureStatus.INACTIVE.name
|
|
1295
|
+
|
|
1296
|
+
_update_data(table_name=EFS_FEATURES_SPEC["table_name"],
|
|
1297
|
+
schema_name=self.__repo,
|
|
1298
|
+
update_columns_values={"status": status},
|
|
1299
|
+
update_conditions={"name": names}
|
|
1300
|
+
)
|
|
1301
|
+
return True
|
|
1302
|
+
|
|
1303
|
+
def apply(self, object):
|
|
1304
|
+
"""
|
|
1305
|
+
DESCRIPTION:
|
|
1306
|
+
Register objects to repository.
|
|
1307
|
+
|
|
1308
|
+
PARAMETERS:
|
|
1309
|
+
object:
|
|
1310
|
+
Required Argument.
|
|
1311
|
+
Specifies the object to update the repository.
|
|
1312
|
+
Types: Feature OR DataSource OR Entity OR FeatureGroup.
|
|
1313
|
+
|
|
1314
|
+
RETURNS:
|
|
1315
|
+
bool.
|
|
1316
|
+
|
|
1317
|
+
RAISES:
|
|
1318
|
+
TeradataMLException
|
|
1319
|
+
|
|
1320
|
+
EXAMPLES:
|
|
1321
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
1322
|
+
>>> df = DataFrame("sales")
|
|
1323
|
+
|
|
1324
|
+
# Example 1: create a Feature for column 'Feb' from 'sales' DataFrame
|
|
1325
|
+
# and register with repo 'vfs_v1'.
|
|
1326
|
+
>>> # Create Feature.
|
|
1327
|
+
>>> from teradataml import Feature
|
|
1328
|
+
>>> feature = Feature('sales:Feb', df.Feb)
|
|
1329
|
+
>>> # Register the above Feature with repo.
|
|
1330
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1331
|
+
>>> fs.apply(feature)
|
|
1332
|
+
True
|
|
1333
|
+
>>>
|
|
1334
|
+
|
|
1335
|
+
# Example 2: create Entity for 'sales' DataFrame and register
|
|
1336
|
+
# with repo 'vfs_v1'.
|
|
1337
|
+
>>> # Create Entity.
|
|
1338
|
+
>>> from teradataml import Entity
|
|
1339
|
+
>>> entity = Entity('sales:accounts', df.accounts)
|
|
1340
|
+
>>> # Register the above Entity with repo.
|
|
1341
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1342
|
+
>>> fs.apply(entity)
|
|
1343
|
+
True
|
|
1344
|
+
>>>
|
|
1345
|
+
|
|
1346
|
+
# Example 3: create DataSource for 'sales' DataFrame and register
|
|
1347
|
+
# with repo 'vfs_v1'.
|
|
1348
|
+
>>> # Create DataSource.
|
|
1349
|
+
>>> from teradataml import DataSource
|
|
1350
|
+
>>> ds = DataSource('Sales_Data', df)
|
|
1351
|
+
>>> # Register the above DataSource with repo.
|
|
1352
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1353
|
+
>>> fs.apply(ds)
|
|
1354
|
+
True
|
|
1355
|
+
>>>
|
|
1356
|
+
|
|
1357
|
+
# Example 4: create FeatureStore with all the objects
|
|
1358
|
+
# created in above examples and register with
|
|
1359
|
+
# repo 'vfs_v1'.
|
|
1360
|
+
>>> # Create FeatureGroup.
|
|
1361
|
+
>>> from teradataml import FeatureGroup
|
|
1362
|
+
>>> fg = FeatureGroup('Sales',
|
|
1363
|
+
... features=feature,
|
|
1364
|
+
... entity=entity,
|
|
1365
|
+
... data_source=data_source)
|
|
1366
|
+
>>> # Register the above FeatureStore with repo.
|
|
1367
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1368
|
+
>>> fs.apply(fg)
|
|
1369
|
+
True
|
|
1370
|
+
>>>
|
|
1371
|
+
"""
|
|
1372
|
+
argument_validation_params = []
|
|
1373
|
+
argument_validation_params.append(["name", object, False, (Feature, Entity, DataSource, FeatureGroup)])
|
|
1374
|
+
|
|
1375
|
+
# Validate argument types
|
|
1376
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
1377
|
+
return object.publish(self.__repo)
|
|
1378
|
+
|
|
1379
|
+
def get_dataset(self, group_name):
|
|
1380
|
+
"""
|
|
1381
|
+
DESCRIPTION:
|
|
1382
|
+
Returns teradataml DataFrame based on "group_name".
|
|
1383
|
+
|
|
1384
|
+
PARAMETERS:
|
|
1385
|
+
group_name:
|
|
1386
|
+
Required Argument.
|
|
1387
|
+
Specifies the name of the feature group.
|
|
1388
|
+
Types: str
|
|
1389
|
+
|
|
1390
|
+
RETURNS:
|
|
1391
|
+
teradataml DataFrame.
|
|
1392
|
+
|
|
1393
|
+
RAISES:
|
|
1394
|
+
TeradataMLException
|
|
1395
|
+
|
|
1396
|
+
EXAMPLES:
|
|
1397
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
1398
|
+
# Load the sales data to Vantage.
|
|
1399
|
+
>>> load_example_data("dataframe", "sales")
|
|
1400
|
+
# Create DataFrame on sales data.
|
|
1401
|
+
>>> df = DataFrame("sales")
|
|
1402
|
+
>>> df
|
|
1403
|
+
>>> df
|
|
1404
|
+
Feb Jan Mar Apr datetime
|
|
1405
|
+
accounts
|
|
1406
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
1407
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
1408
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
1409
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
1410
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
1411
|
+
>>>
|
|
1412
|
+
# Create FeatureGroup with name 'sales' from DataFrame.
|
|
1413
|
+
>>> fg = FeatureGroup.from_DataFrame(
|
|
1414
|
+
... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
|
|
1415
|
+
# Apply the FeatureGroup to FeatureStore.
|
|
1416
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1417
|
+
>>> fs.apply(fg)
|
|
1418
|
+
True
|
|
1419
|
+
|
|
1420
|
+
# Get the DataSet for FeatureGroup 'sales'
|
|
1421
|
+
>>> df = fs.get_dataset('sales')
|
|
1422
|
+
>>> df
|
|
1423
|
+
datetime Jan Feb Apr Mar
|
|
1424
|
+
accounts
|
|
1425
|
+
Orange Inc 04/01/2017 NaN 210.0 250.0 NaN
|
|
1426
|
+
Jones LLC 04/01/2017 150.0 200.0 180.0 140.0
|
|
1427
|
+
Blue Inc 04/01/2017 50.0 90.0 101.0 95.0
|
|
1428
|
+
Alpha Co 04/01/2017 200.0 210.0 250.0 215.0
|
|
1429
|
+
Yellow Inc 04/01/2017 NaN 90.0 NaN NaN
|
|
1430
|
+
>>>
|
|
1431
|
+
"""
|
|
1432
|
+
# Get the FeatureGroup first and extract all details.
|
|
1433
|
+
feature_group = self.get_feature_group(group_name)
|
|
1434
|
+
columns = [feature.column_name for feature in feature_group.features
|
|
1435
|
+
if feature.status != FeatureStatus.INACTIVE]
|
|
1436
|
+
entity_columns = feature_group.entity.columns
|
|
1437
|
+
source = feature_group.data_source.source
|
|
1438
|
+
|
|
1439
|
+
# Create DF from the source.
|
|
1440
|
+
df = DataFrame.from_query(source)
|
|
1441
|
+
|
|
1442
|
+
# Select the corresponding columns.
|
|
1443
|
+
required_columns = entity_columns + columns
|
|
1444
|
+
if feature_group.data_source.timestamp_col_name:
|
|
1445
|
+
columns = [col for col in columns if col != feature_group.data_source.timestamp_col_name]
|
|
1446
|
+
required_columns = entity_columns + [feature_group.data_source.timestamp_col_name] + columns
|
|
1447
|
+
return df.select(required_columns)
|
|
1448
|
+
|
|
1449
|
+
def __get_feature_group_names(self, name, type_):
|
|
1450
|
+
"""
|
|
1451
|
+
DESCRIPTION:
|
|
1452
|
+
Internal function to get the associated group names for
|
|
1453
|
+
Feature or DataSource OR Entity.
|
|
1454
|
+
|
|
1455
|
+
PARAMETERS:
|
|
1456
|
+
name:
|
|
1457
|
+
Required Argument.
|
|
1458
|
+
Specifies the name of the Feature or DataSource or Entity.
|
|
1459
|
+
Types: str
|
|
1460
|
+
|
|
1461
|
+
type_:
|
|
1462
|
+
Required Argument.
|
|
1463
|
+
Specifies the type of the objects stored in feature store.
|
|
1464
|
+
Permitted Values:
|
|
1465
|
+
* feature
|
|
1466
|
+
* data_source
|
|
1467
|
+
* entity
|
|
1468
|
+
Types: str
|
|
1469
|
+
|
|
1470
|
+
RETURNS:
|
|
1471
|
+
list
|
|
1472
|
+
|
|
1473
|
+
RAISES:
|
|
1474
|
+
None
|
|
1475
|
+
|
|
1476
|
+
EXAMPLES:
|
|
1477
|
+
>>> self.__get_feature_group_names('admissions', 'data_source')
|
|
1478
|
+
"""
|
|
1479
|
+
if type_ == "feature":
|
|
1480
|
+
df = self.__get_features_df()
|
|
1481
|
+
return [rec.group_name for rec in df[df.name == name].itertuples() if rec.group_name is not None]
|
|
1482
|
+
elif type_ == "data_source":
|
|
1483
|
+
df = self.__get_feature_group_df()
|
|
1484
|
+
return [rec.name for rec in df[df.data_source_name == name].itertuples()]
|
|
1485
|
+
elif type_ == "entity":
|
|
1486
|
+
df = self.__get_feature_group_df()
|
|
1487
|
+
return [rec.name for rec in df[df.entity_name == name].itertuples()]
|
|
1488
|
+
|
|
1489
|
+
def __remove_obj(self, name, type_, action="archive"):
|
|
1490
|
+
"""
|
|
1491
|
+
DESCRIPTION:
|
|
1492
|
+
Internal function to get the remove Feature or DataSource OR
|
|
1493
|
+
Entity from repo.
|
|
1494
|
+
|
|
1495
|
+
PARAMETERS:
|
|
1496
|
+
name:
|
|
1497
|
+
Required Argument.
|
|
1498
|
+
Specifies the name of the Feature or DataSource or Entity.
|
|
1499
|
+
Types: str
|
|
1500
|
+
|
|
1501
|
+
type_:
|
|
1502
|
+
Required Argument.
|
|
1503
|
+
Specifies the type of "name".
|
|
1504
|
+
Types: str
|
|
1505
|
+
Permitted Values:
|
|
1506
|
+
* feature
|
|
1507
|
+
* data_source
|
|
1508
|
+
* entity
|
|
1509
|
+
|
|
1510
|
+
action:
|
|
1511
|
+
Optional Argument.
|
|
1512
|
+
Specifies whether to remove from staging tables or not.
|
|
1513
|
+
When set to True, object is removed from staging tables.
|
|
1514
|
+
Otherwise, object is fetched from regular tables.
|
|
1515
|
+
Default Value: True
|
|
1516
|
+
Types: bool
|
|
1517
|
+
|
|
1518
|
+
RETURNS:
|
|
1519
|
+
bool
|
|
1520
|
+
|
|
1521
|
+
RAISES:
|
|
1522
|
+
None
|
|
1523
|
+
|
|
1524
|
+
EXAMPLES:
|
|
1525
|
+
>>> self.__remove_obj('admissions', 'data_source')
|
|
1526
|
+
"""
|
|
1527
|
+
_vars = {
|
|
1528
|
+
"data_source": {"class": DataSource, "error_msg": "Update these FeatureGroups with other DataSources"},
|
|
1529
|
+
"entity": {"class": Entity, "error_msg": "Update these FeatureGroups with other Entities"},
|
|
1530
|
+
"feature": {"class": Feature, "error_msg": "Remove the Feature from FeatureGroup"},
|
|
1531
|
+
}
|
|
1532
|
+
c_name_ = _vars[type_]["class"].__name__
|
|
1533
|
+
argument_validation_params = []
|
|
1534
|
+
argument_validation_params.append([type_, name, False, (str, _vars[type_]["class"]), True])
|
|
1535
|
+
|
|
1536
|
+
# Validate argument types
|
|
1537
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
1538
|
+
# Extract the name if argument is class type.
|
|
1539
|
+
if isinstance(name, _vars[type_]["class"]):
|
|
1540
|
+
name = name.name
|
|
1541
|
+
|
|
1542
|
+
# Before removing it, check if it is associated with any FeatureGroup.
|
|
1543
|
+
# If yes, raise error. Applicable only for Archive.
|
|
1544
|
+
if action == "archive":
|
|
1545
|
+
feature_groups = self.__get_feature_group_names(name, type_)
|
|
1546
|
+
if feature_groups:
|
|
1547
|
+
feature_groups = ", ".join(("'{}'".format(fg) for fg in feature_groups))
|
|
1548
|
+
message = ("{} '{}' is associated with FeatureGroups {}. {} and try deleting again.".format(
|
|
1549
|
+
c_name_, name, feature_groups, _vars[type_]["error_msg"]))
|
|
1550
|
+
raise TeradataMlException(Messages.get_message(
|
|
1551
|
+
MessageCodes.FUNC_EXECUTION_FAILED, '{}_{}'.format(action, type_), message),
|
|
1552
|
+
MessageCodes.FUNC_EXECUTION_FAILED)
|
|
1553
|
+
|
|
1554
|
+
if type_ == "entity":
|
|
1555
|
+
res = self._remove_entity(name, action)
|
|
1556
|
+
else:
|
|
1557
|
+
table_name = self.__table_names[type_]
|
|
1558
|
+
if action == "delete":
|
|
1559
|
+
table_name = self.__table_names["{}_staging".format(type_)]
|
|
1560
|
+
|
|
1561
|
+
res = _delete_data(table_name=table_name,
|
|
1562
|
+
schema_name=self.__repo,
|
|
1563
|
+
delete_conditions=(Col("name") == name)
|
|
1564
|
+
)
|
|
1565
|
+
|
|
1566
|
+
if res == 1:
|
|
1567
|
+
print("{} '{}' is {}d.".format(c_name_, name, action))
|
|
1568
|
+
return True
|
|
1569
|
+
else:
|
|
1570
|
+
print("{} '{}' does not exist to {}.".format(c_name_, name, action))
|
|
1571
|
+
return False
|
|
1572
|
+
|
|
1573
|
+
@db_transaction
|
|
1574
|
+
def _remove_entity(self, name, action):
|
|
1575
|
+
"""
|
|
1576
|
+
DESCRIPTION:
|
|
1577
|
+
Internal function to get the remove Entity from repo.
|
|
1578
|
+
|
|
1579
|
+
PARAMETERS:
|
|
1580
|
+
name:
|
|
1581
|
+
Required Argument.
|
|
1582
|
+
Specifies the name of the Entity.
|
|
1583
|
+
Types: str
|
|
1584
|
+
|
|
1585
|
+
action:
|
|
1586
|
+
Required Argument.
|
|
1587
|
+
Specifies whether to remove from staging tables or not.
|
|
1588
|
+
When set to "delete", Entity is removed from staging tables.
|
|
1589
|
+
Otherwise, Entity is removed from regular tables.
|
|
1590
|
+
Types: str
|
|
1591
|
+
|
|
1592
|
+
RETURNS:
|
|
1593
|
+
bool
|
|
1594
|
+
|
|
1595
|
+
RAISES:
|
|
1596
|
+
None
|
|
1597
|
+
|
|
1598
|
+
EXAMPLES:
|
|
1599
|
+
>>> self._remove_entity('admissions', 'delete')
|
|
1600
|
+
"""
|
|
1601
|
+
ent_table = self.__table_names["entity"]
|
|
1602
|
+
ent_table_xref = self.__table_names["entity_xref"]
|
|
1603
|
+
if action == "delete":
|
|
1604
|
+
ent_table = self.__table_names["entity_staging"]
|
|
1605
|
+
ent_table_xref = self.__table_names["entity_staging_xref"]
|
|
1606
|
+
|
|
1607
|
+
# remove it from xref table first.
|
|
1608
|
+
_delete_data(table_name=ent_table_xref,
|
|
1609
|
+
schema_name=self.__repo,
|
|
1610
|
+
delete_conditions=(Col("entity_name") == name)
|
|
1611
|
+
)
|
|
1612
|
+
|
|
1613
|
+
# remove from entity table.
|
|
1614
|
+
res = _delete_data(table_name=ent_table,
|
|
1615
|
+
schema_name=self.__repo,
|
|
1616
|
+
delete_conditions=(Col("name") == name)
|
|
1617
|
+
)
|
|
1618
|
+
|
|
1619
|
+
return res
|
|
1620
|
+
|
|
1621
|
+
def archive_data_source(self, data_source):
|
|
1622
|
+
"""
|
|
1623
|
+
DESCRIPTION:
|
|
1624
|
+
Archives DataSource from repository. Note that archived DataSource
|
|
1625
|
+
is not available for any further processing. Archived DataSource can be
|
|
1626
|
+
viewed using "list_archived_data_sources()" method.
|
|
1627
|
+
|
|
1628
|
+
PARAMETERS:
|
|
1629
|
+
data_source:
|
|
1630
|
+
Required Argument.
|
|
1631
|
+
Specifies either the name of DataSource or Object of DataSource
|
|
1632
|
+
to archive from repository.
|
|
1633
|
+
Types: str OR DataSource
|
|
1634
|
+
|
|
1635
|
+
RETURNS:
|
|
1636
|
+
bool
|
|
1637
|
+
|
|
1638
|
+
RAISES:
|
|
1639
|
+
TeradataMLException, TypeError, ValueError
|
|
1640
|
+
|
|
1641
|
+
EXAMPLES:
|
|
1642
|
+
>>> from teradataml import DataSource, FeatureStore, load_example_data
|
|
1643
|
+
# Create a DataSource using SELECT statement.
|
|
1644
|
+
>>> ds = DataSource(name="sales_data", source="select * from sales")
|
|
1645
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1646
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1647
|
+
# Apply DataSource to FeatureStore.
|
|
1648
|
+
>>> fs.apply(ds)
|
|
1649
|
+
True
|
|
1650
|
+
# List the available DataSources.
|
|
1651
|
+
>>> fs.list_data_sources()
|
|
1652
|
+
description timestamp_col_name source
|
|
1653
|
+
name
|
|
1654
|
+
sales_data None None select * from sales
|
|
1655
|
+
|
|
1656
|
+
# Archive DataSource with name "sales_data".
|
|
1657
|
+
>>> fs.archive_data_source("sales_data")
|
|
1658
|
+
DataSource 'sales_data' is archived.
|
|
1659
|
+
True
|
|
1660
|
+
>>>
|
|
1661
|
+
# List the available DataSources after archive.
|
|
1662
|
+
>>> fs.list_data_sources()
|
|
1663
|
+
Empty DataFrame
|
|
1664
|
+
Columns: [description, timestamp_col_name, source]
|
|
1665
|
+
Index: []
|
|
1666
|
+
"""
|
|
1667
|
+
return self.__remove_obj(name=data_source, type_="data_source")
|
|
1668
|
+
|
|
1669
|
+
def delete_data_source(self, data_source):
|
|
1670
|
+
"""
|
|
1671
|
+
DESCRIPTION:
|
|
1672
|
+
Removes the archived DataSource from repository.
|
|
1673
|
+
|
|
1674
|
+
PARAMETERS:
|
|
1675
|
+
data_source:
|
|
1676
|
+
Required Argument.
|
|
1677
|
+
Specifies either the name of DataSource or Object of DataSource
|
|
1678
|
+
to remove from repository.
|
|
1679
|
+
Types: str OR DataSource
|
|
1680
|
+
|
|
1681
|
+
RETURNS:
|
|
1682
|
+
bool.
|
|
1683
|
+
|
|
1684
|
+
RAISES:
|
|
1685
|
+
TeradataMLException, TypeError, ValueError
|
|
1686
|
+
|
|
1687
|
+
EXAMPLES:
|
|
1688
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
1689
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
1690
|
+
# Create teradataml DataFrame.
|
|
1691
|
+
>>> df = DataFrame("sales")
|
|
1692
|
+
# Create DataSource with source as teradataml DataFrame.
|
|
1693
|
+
>>> ds = DataSource(name="sales_data", source=df)
|
|
1694
|
+
# # Create FeatureStore for repo 'vfs_v1'.
|
|
1695
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1696
|
+
# Apply the DataSource to FeatureStore.
|
|
1697
|
+
>>> fs.apply(ds)
|
|
1698
|
+
True
|
|
1699
|
+
# Let's first archive the DataSource.
|
|
1700
|
+
>>> fs.archive_data_source("sales_data")
|
|
1701
|
+
DataSource 'sales_data' is archived.
|
|
1702
|
+
True
|
|
1703
|
+
|
|
1704
|
+
# Delete DataSource with name "sales_data".
|
|
1705
|
+
>>> fs.delete_data_source("sales_data")
|
|
1706
|
+
DataSource 'sales_data' is deleted.
|
|
1707
|
+
True
|
|
1708
|
+
>>>
|
|
1709
|
+
"""
|
|
1710
|
+
return self.__remove_obj(name=data_source, type_="data_source", action="delete")
|
|
1711
|
+
|
|
1712
|
+
def archive_feature(self, feature):
|
|
1713
|
+
"""
|
|
1714
|
+
DESCRIPTION:
|
|
1715
|
+
Archives Feature from repository. Note that archived Feature
|
|
1716
|
+
is not available for any further processing. Archived Feature can be
|
|
1717
|
+
viewed using "list_archived_features()" method.
|
|
1718
|
+
|
|
1719
|
+
PARAMETERS:
|
|
1720
|
+
feature:
|
|
1721
|
+
Required Argument.
|
|
1722
|
+
Specifies either the name of Feature or Object of Feature
|
|
1723
|
+
to archive from repository.
|
|
1724
|
+
Types: str OR Feature
|
|
1725
|
+
|
|
1726
|
+
RETURNS:
|
|
1727
|
+
bool
|
|
1728
|
+
|
|
1729
|
+
RAISES:
|
|
1730
|
+
TeradataMLException, TypeError, ValueError
|
|
1731
|
+
|
|
1732
|
+
EXAMPLES:
|
|
1733
|
+
>>> from teradataml import DataFrame, Feature, FeatureStore
|
|
1734
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
1735
|
+
# Create teradataml DataFrame.
|
|
1736
|
+
>>> df = DataFrame("sales")
|
|
1737
|
+
# Create Feature for Column 'Feb'.
|
|
1738
|
+
>>> feature = Feature(name="sales_data_Feb", column=df.Feb)
|
|
1739
|
+
# Create FeatureStore for the repo 'staging_repo'.
|
|
1740
|
+
>>> fs = FeatureStore("staging_repo")
|
|
1741
|
+
# Apply the Feature to FeatureStore.
|
|
1742
|
+
>>> fs.apply(feature)
|
|
1743
|
+
True
|
|
1744
|
+
# List the available Features.
|
|
1745
|
+
>>> fs.list_features()
|
|
1746
|
+
column_name description creation_time modified_time tags data_type feature_type status group_name
|
|
1747
|
+
name
|
|
1748
|
+
sales_data_Feb Feb None 2024-10-03 18:21:03.720464 None None FLOAT CONTINUOUS ACTIVE None
|
|
1749
|
+
|
|
1750
|
+
# Archive Feature with name "sales_data_Feb".
|
|
1751
|
+
>>> fs.archive_feature(feature=feature)
|
|
1752
|
+
Feature 'sales_data_Feb' is archived.
|
|
1753
|
+
True
|
|
1754
|
+
# List the available Features after archive.
|
|
1755
|
+
>>> fs.list_features()
|
|
1756
|
+
Empty DataFrame
|
|
1757
|
+
Columns: [column_name, description, creation_time, modified_time, tags, data_type, feature_type, status, group_name]
|
|
1758
|
+
Index: []
|
|
1759
|
+
>>>
|
|
1760
|
+
"""
|
|
1761
|
+
return self.__remove_obj(name=feature, type_="feature")
|
|
1762
|
+
|
|
1763
|
+
def delete(self):
|
|
1764
|
+
"""
|
|
1765
|
+
DESCRIPTION:
|
|
1766
|
+
Removes the FeatureStore and its components from repository.
|
|
1767
|
+
Notes:
|
|
1768
|
+
* The function removes all the associated database objects along with data.
|
|
1769
|
+
Be cautious while using this function.
|
|
1770
|
+
* The function tries to remove the underlying Database also once
|
|
1771
|
+
all the Feature Store objects are removed.
|
|
1772
|
+
* The user must have permission on the database used by this Feature Store
|
|
1773
|
+
* to drop triggers.
|
|
1774
|
+
* to drop the tables.
|
|
1775
|
+
* to drop the Database.
|
|
1776
|
+
* If the user lacks any of the mentioned permissions, Teradata recommends
|
|
1777
|
+
to not use this function.
|
|
1778
|
+
|
|
1779
|
+
PARAMETERS:
|
|
1780
|
+
None
|
|
1781
|
+
|
|
1782
|
+
RETURNS:
|
|
1783
|
+
bool.
|
|
1784
|
+
|
|
1785
|
+
RAISES:
|
|
1786
|
+
None
|
|
1787
|
+
|
|
1788
|
+
EXAMPLES:
|
|
1789
|
+
# Setup FeatureStore for repo 'vfs_v1'.
|
|
1790
|
+
>>> from teradataml import FeatureStore
|
|
1791
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1792
|
+
>>> fs.setup()
|
|
1793
|
+
True
|
|
1794
|
+
>>> # Delete FeatureStore.
|
|
1795
|
+
>>> fs.delete()
|
|
1796
|
+
True
|
|
1797
|
+
>>>
|
|
1798
|
+
"""
|
|
1799
|
+
confirmation = input("The function removes Feature Store and drops the "
|
|
1800
|
+
"corresponding repo also. Are you sure you want to proceed? (Y/N): ")
|
|
1801
|
+
|
|
1802
|
+
if confirmation in ["Y", "y"]:
|
|
1803
|
+
return self.__drop_feature_store_objects(self.__repo)
|
|
1804
|
+
|
|
1805
|
+
return False
|
|
1806
|
+
|
|
1807
|
+
@staticmethod
|
|
1808
|
+
def __drop_feature_store_objects(repo_name):
|
|
1809
|
+
"""
|
|
1810
|
+
DESCRIPTION:
|
|
1811
|
+
Removes the FeatureStore and it's components from repository.
|
|
1812
|
+
|
|
1813
|
+
PARAMETERS:
|
|
1814
|
+
repo_name:
|
|
1815
|
+
Required Argument.
|
|
1816
|
+
Specifies the name of the repository.
|
|
1817
|
+
Types: str
|
|
1818
|
+
|
|
1819
|
+
RETURNS:
|
|
1820
|
+
bool
|
|
1821
|
+
"""
|
|
1822
|
+
# Drop all the tables and staging tables.
|
|
1823
|
+
tables_ = [
|
|
1824
|
+
EFS_GROUP_FEATURES_SPEC["table_name"],
|
|
1825
|
+
EFS_FEATURE_GROUP_SPEC["table_name"],
|
|
1826
|
+
EFS_FEATURES_SPEC['table_name'],
|
|
1827
|
+
EFS_ENTITY_XREF_SPEC['table_name'],
|
|
1828
|
+
EFS_ENTITY_SPEC["table_name"],
|
|
1829
|
+
EFS_DATA_SOURCE_SPEC["table_name"]
|
|
1830
|
+
]
|
|
1831
|
+
|
|
1832
|
+
tables_stg_ = [
|
|
1833
|
+
EFS_FEATURES_STAGING_SPEC['table_name'],
|
|
1834
|
+
EFS_ENTITY_STAGING_SPEC["table_name"],
|
|
1835
|
+
EFS_ENTITY_XREF_STAGING_SPEC["table_name"],
|
|
1836
|
+
EFS_DATA_SOURCE_STAGING_SPEC["table_name"],
|
|
1837
|
+
EFS_FEATURE_GROUP_STAGING_SPEC["table_name"],
|
|
1838
|
+
EFS_GROUP_FEATURES_STAGING_SPEC["table_name"]
|
|
1839
|
+
]
|
|
1840
|
+
|
|
1841
|
+
# Drop all the triggers first. So that tables can be dropped.
|
|
1842
|
+
triggers = ["{}_trg".format(table) for table in tables_]
|
|
1843
|
+
for trigger in triggers:
|
|
1844
|
+
execute_sql("drop trigger {}.{}".format(repo_name, trigger))
|
|
1845
|
+
|
|
1846
|
+
for table in (tables_ + [EFS_VERSION_SPEC["table_name"]] + tables_stg_):
|
|
1847
|
+
db_drop_table(table, schema_name=repo_name)
|
|
1848
|
+
|
|
1849
|
+
execute_sql("DROP DATABASE {}".format(repo_name))
|
|
1850
|
+
|
|
1851
|
+
return True
|
|
1852
|
+
|
|
1853
|
+
def delete_feature(self, feature):
|
|
1854
|
+
"""
|
|
1855
|
+
DESCRIPTION:
|
|
1856
|
+
Removes the archived Feature from repository.
|
|
1857
|
+
|
|
1858
|
+
PARAMETERS:
|
|
1859
|
+
feature:
|
|
1860
|
+
Required Argument.
|
|
1861
|
+
Specifies either the name of Feature or Object of Feature
|
|
1862
|
+
to remove from repository.
|
|
1863
|
+
Types: str OR Feature
|
|
1864
|
+
|
|
1865
|
+
RETURNS:
|
|
1866
|
+
bool.
|
|
1867
|
+
|
|
1868
|
+
RAISES:
|
|
1869
|
+
TeradataMLException, TypeError, ValueError
|
|
1870
|
+
|
|
1871
|
+
EXAMPLES:
|
|
1872
|
+
>>> from teradataml import DataFrame, Feature, FeatureStore
|
|
1873
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
1874
|
+
# Create teradataml DataFrame.
|
|
1875
|
+
>>> df = DataFrame("sales")
|
|
1876
|
+
# Create Feature for Column 'Feb'.
|
|
1877
|
+
>>> feature = Feature(name="sales_data_Feb", column=df.Feb)
|
|
1878
|
+
# Create a feature store with name "staging_repo".
|
|
1879
|
+
>>> fs = FeatureStore("staging_repo")
|
|
1880
|
+
# Add the feature created above in the feature store.
|
|
1881
|
+
>>> fs.apply(feature)
|
|
1882
|
+
True
|
|
1883
|
+
# Let's first archive the Feature.
|
|
1884
|
+
>>> fs.archive_feature(feature=feature)
|
|
1885
|
+
Feature 'sales_data_Feb' is archived.
|
|
1886
|
+
True
|
|
1887
|
+
|
|
1888
|
+
# Delete Feature with name "sales_data_Feb".
|
|
1889
|
+
>>> fs.delete_feature(feature=feature)
|
|
1890
|
+
Feature 'sales_data_Feb' is deleted.
|
|
1891
|
+
True
|
|
1892
|
+
>>>
|
|
1893
|
+
"""
|
|
1894
|
+
return self.__remove_obj(name=feature, type_="feature", action="delete")
|
|
1895
|
+
|
|
1896
|
+
def archive_entity(self, entity):
|
|
1897
|
+
"""
|
|
1898
|
+
DESCRIPTION:
|
|
1899
|
+
Archives Entity from repository. Note that archived Entity
|
|
1900
|
+
is not available for any further processing. Archived Entity can be
|
|
1901
|
+
viewed using "list_archived_entities()" method.
|
|
1902
|
+
|
|
1903
|
+
PARAMETERS:
|
|
1904
|
+
entity:
|
|
1905
|
+
Required Argument.
|
|
1906
|
+
Specifies either the name of Entity or Object of Entity
|
|
1907
|
+
to remove from repository.
|
|
1908
|
+
Types: str OR Entity
|
|
1909
|
+
|
|
1910
|
+
RETURNS:
|
|
1911
|
+
bool.
|
|
1912
|
+
|
|
1913
|
+
RAISES:
|
|
1914
|
+
TeradataMLException, TypeError, ValueError
|
|
1915
|
+
|
|
1916
|
+
EXAMPLES:
|
|
1917
|
+
>>> from teradataml import DataFrame, Entity, FeatureStore
|
|
1918
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
1919
|
+
# Create teradataml DataFrame.
|
|
1920
|
+
>>> df = DataFrame("sales")
|
|
1921
|
+
# Create Entity using teradataml DataFrame Column.
|
|
1922
|
+
>>> entity = Entity(name="sales_data", columns=df.accounts)
|
|
1923
|
+
# Create FeatureStore for repo 'staging_repo'.
|
|
1924
|
+
>>> fs = FeatureStore("staging_repo")
|
|
1925
|
+
# Apply the entity to FeatureStore.
|
|
1926
|
+
>>> fs.apply(entity)
|
|
1927
|
+
True
|
|
1928
|
+
# List all the available entities.
|
|
1929
|
+
>>> fs.list_entities()
|
|
1930
|
+
description
|
|
1931
|
+
name entity_column
|
|
1932
|
+
sales_data accounts None
|
|
1933
|
+
|
|
1934
|
+
# Archive Entity with name "sales_data".
|
|
1935
|
+
>>> fs.archive_entity(entity=entity.name)
|
|
1936
|
+
Entity 'sales_data' is archived.
|
|
1937
|
+
True
|
|
1938
|
+
# List the entities after archive.
|
|
1939
|
+
>>> fs.list_entities()
|
|
1940
|
+
Empty DataFrame
|
|
1941
|
+
Columns: [description]
|
|
1942
|
+
Index: []
|
|
1943
|
+
"""
|
|
1944
|
+
return self.__remove_obj(name=entity, type_="entity")
|
|
1945
|
+
|
|
1946
|
+
def delete_entity(self, entity):
|
|
1947
|
+
"""
|
|
1948
|
+
DESCRIPTION:
|
|
1949
|
+
Removes archived Entity from repository.
|
|
1950
|
+
|
|
1951
|
+
PARAMETERS:
|
|
1952
|
+
entity:
|
|
1953
|
+
Required Argument.
|
|
1954
|
+
Specifies either the name of Entity or Object of Entity
|
|
1955
|
+
to delete from repository.
|
|
1956
|
+
Types: str OR Entity
|
|
1957
|
+
|
|
1958
|
+
RETURNS:
|
|
1959
|
+
bool.
|
|
1960
|
+
|
|
1961
|
+
RAISES:
|
|
1962
|
+
TeradataMLException, TypeError, ValueError
|
|
1963
|
+
|
|
1964
|
+
EXAMPLES:
|
|
1965
|
+
>>> from teradataml import DataFrame, Entity, FeatureStore
|
|
1966
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
1967
|
+
# Create teradataml DataFrame.
|
|
1968
|
+
>>> df = DataFrame("sales")
|
|
1969
|
+
# Create Entity using teradataml DataFrame Column.
|
|
1970
|
+
>>> entity = Entity(name="sales_data", columns=df.accounts)
|
|
1971
|
+
# Create FeatureStore for repo 'staging_repo'.
|
|
1972
|
+
>>> fs = FeatureStore("staging_repo")
|
|
1973
|
+
# Apply the entity to FeatureStore.
|
|
1974
|
+
>>> fs.apply(entity)
|
|
1975
|
+
True
|
|
1976
|
+
# Let's first archive the entity.
|
|
1977
|
+
>>> fs.archive_entity(entity=entity.name)
|
|
1978
|
+
Entity 'sales_data' is archived.
|
|
1979
|
+
True
|
|
1980
|
+
|
|
1981
|
+
# Delete Entity with name "sales_data".
|
|
1982
|
+
>>> fs.delete_entity(entity=entity.name)
|
|
1983
|
+
Entity 'sales_data' is deleted.
|
|
1984
|
+
True
|
|
1985
|
+
>>>
|
|
1986
|
+
"""
|
|
1987
|
+
return self.__remove_obj(name=entity, type_="entity", action="delete")
|
|
1988
|
+
|
|
1989
|
+
def __get_features_where_clause(self, features):
|
|
1990
|
+
"""
|
|
1991
|
+
Internal function to prepare a where clause on features df.
|
|
1992
|
+
"""
|
|
1993
|
+
col_expr = Col("name") == features[0]
|
|
1994
|
+
for feature in features[1:]:
|
|
1995
|
+
col_expr = ((col_expr) | (Col("name") == feature))
|
|
1996
|
+
|
|
1997
|
+
return col_expr
|
|
1998
|
+
|
|
1999
|
+
def archive_feature_group(self, feature_group):
|
|
2000
|
+
"""
|
|
2001
|
+
DESCRIPTION:
|
|
2002
|
+
Archives FeatureGroup from repository. Note that archived FeatureGroup
|
|
2003
|
+
is not available for any further processing. Archived FeatureGroup can be
|
|
2004
|
+
viewed using "list_archived_feature_groups()" method.
|
|
2005
|
+
Note:
|
|
2006
|
+
The function archives the associated Features, Entity and DataSource
|
|
2007
|
+
if they are not associated with any other FeatureGroups.
|
|
2008
|
+
|
|
2009
|
+
PARAMETERS:
|
|
2010
|
+
feature_group:
|
|
2011
|
+
Required Argument.
|
|
2012
|
+
Specifies either the name of FeatureGroup or Object of FeatureGroup
|
|
2013
|
+
to archive from repository.
|
|
2014
|
+
Types: str OR FeatureGroup
|
|
2015
|
+
|
|
2016
|
+
RETURNS:
|
|
2017
|
+
bool.
|
|
2018
|
+
|
|
2019
|
+
RAISES:
|
|
2020
|
+
TeradataMLException, TypeError, ValueError
|
|
2021
|
+
|
|
2022
|
+
EXAMPLES:
|
|
2023
|
+
>>> from teradataml import DataFrame, FeatureGroup, FeatureStore
|
|
2024
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
2025
|
+
# Create teradataml DataFrame.
|
|
2026
|
+
>>> df = DataFrame("sales")
|
|
2027
|
+
# Create FeatureGroup from teradataml DataFrame.
|
|
2028
|
+
>>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_col_name="datetime")
|
|
2029
|
+
# Create FeatureStore for the repo 'staging_repo'.
|
|
2030
|
+
>>> fs = FeatureStore("staging_repo")
|
|
2031
|
+
# Apply FeatureGroup to FeatureStore.
|
|
2032
|
+
>>> fs.apply(fg)
|
|
2033
|
+
True
|
|
2034
|
+
# List all the available FeatureGroups.
|
|
2035
|
+
>>> fs.list_feature_groups()
|
|
2036
|
+
description data_source_name entity_name
|
|
2037
|
+
name
|
|
2038
|
+
sales None sales sales
|
|
2039
|
+
|
|
2040
|
+
# Archive FeatureGroup with name "sales".
|
|
2041
|
+
>>> fs.archive_feature_group(feature_group='sales')
|
|
2042
|
+
FeatureGroup 'sales' is archived.
|
|
2043
|
+
True
|
|
2044
|
+
>>>
|
|
2045
|
+
# List all the available FeatureGroups after archive.
|
|
2046
|
+
>>> fs.list_feature_groups()
|
|
2047
|
+
Empty DataFrame
|
|
2048
|
+
Columns: [description, data_source_name, entity_name]
|
|
2049
|
+
Index: []
|
|
2050
|
+
"""
|
|
2051
|
+
argument_validation_params = []
|
|
2052
|
+
argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
|
|
2053
|
+
|
|
2054
|
+
# Validate argument types
|
|
2055
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
2056
|
+
|
|
2057
|
+
feature_group_name = feature_group if isinstance(feature_group, str) else feature_group.name
|
|
2058
|
+
|
|
2059
|
+
fg = self.get_feature_group(feature_group_name) if isinstance(feature_group, str) else feature_group
|
|
2060
|
+
|
|
2061
|
+
fg_df = self.list_feature_groups()
|
|
2062
|
+
|
|
2063
|
+
# Find out shared Features. Extract the features which are mapped to
|
|
2064
|
+
# other groups. They can not be deleted.
|
|
2065
|
+
feature_names = [f.name for f in fg.features]
|
|
2066
|
+
features_df = self.list_features()
|
|
2067
|
+
col_expr = self.__get_features_where_clause(feature_names)
|
|
2068
|
+
features_df = features_df[((features_df.group_name != fg.name) & (col_expr))]
|
|
2069
|
+
shared_features = [f.name for f in features_df.drop_duplicate('name').itertuples()]
|
|
2070
|
+
feature_names_to_remove = [f for f in feature_names if f not in shared_features]
|
|
2071
|
+
|
|
2072
|
+
# Find out shared Entities. If entity is not shared, then update 'entity_name'
|
|
2073
|
+
# to update value.
|
|
2074
|
+
entity_name = None
|
|
2075
|
+
ent = fg_df[((fg_df.entity_name == fg.entity.name) & (fg_df.name != fg.name))]
|
|
2076
|
+
recs = ent.shape[0]
|
|
2077
|
+
if recs == 0:
|
|
2078
|
+
entity_name = fg.entity.name
|
|
2079
|
+
|
|
2080
|
+
# Find out shared DataSources. If datasource is not shared, then update 'data_source_name'.
|
|
2081
|
+
data_source_name = None
|
|
2082
|
+
ds_df = fg_df[((fg_df.data_source_name == fg.data_source.name) & (fg_df.name != fg.name))]
|
|
2083
|
+
recs = ds_df.shape[0]
|
|
2084
|
+
if recs == 0:
|
|
2085
|
+
data_source_name = fg.data_source.name
|
|
2086
|
+
|
|
2087
|
+
res = self._archive_feature_group(fg.name, feature_names_to_remove, entity_name, data_source_name)
|
|
2088
|
+
|
|
2089
|
+
if res == 1:
|
|
2090
|
+
print("FeatureGroup '{}' is archived.".format(feature_group_name))
|
|
2091
|
+
return True
|
|
2092
|
+
|
|
2093
|
+
print("FeatureGroup '{}' not exist to archive.".format(feature_group_name))
|
|
2094
|
+
return False
|
|
2095
|
+
|
|
2096
|
+
@db_transaction
|
|
2097
|
+
def _archive_feature_group(self, group_name, feature_names, entity_name, data_source_name):
|
|
2098
|
+
"""
|
|
2099
|
+
DESCRIPTION:
|
|
2100
|
+
Internal method to archive FeatureGroup from repository.
|
|
2101
|
+
|
|
2102
|
+
PARAMETERS:
|
|
2103
|
+
group_name:
|
|
2104
|
+
Required Argument.
|
|
2105
|
+
Specifies the name of FeatureGroup to archive from repository.
|
|
2106
|
+
Types: str
|
|
2107
|
+
|
|
2108
|
+
feature_names:
|
|
2109
|
+
Required Argument.
|
|
2110
|
+
Specifies the name of Features to archive from repository.
|
|
2111
|
+
Types: list
|
|
2112
|
+
|
|
2113
|
+
entity_name:
|
|
2114
|
+
Required Argument.
|
|
2115
|
+
Specifies the name of Entity to archive from repository.
|
|
2116
|
+
Types: str
|
|
2117
|
+
|
|
2118
|
+
data_source_name:
|
|
2119
|
+
Required Argument.
|
|
2120
|
+
Specifies the name of DataSource to archive from repository.
|
|
2121
|
+
Types: str
|
|
2122
|
+
|
|
2123
|
+
RETURNS:
|
|
2124
|
+
bool.
|
|
2125
|
+
|
|
2126
|
+
RAISES:
|
|
2127
|
+
OperationalError
|
|
2128
|
+
|
|
2129
|
+
EXAMPLES:
|
|
2130
|
+
>>> self._archive_feature_group("group1", ["feature1"], "entity_name", None)
|
|
2131
|
+
"""
|
|
2132
|
+
# Remove data for FeatureGroup from Xref table.
|
|
2133
|
+
# This allows to remove data from other tables.
|
|
2134
|
+
res = _delete_data(schema_name=self.__repo,
|
|
2135
|
+
table_name=EFS_GROUP_FEATURES_SPEC["table_name"],
|
|
2136
|
+
delete_conditions=(Col("group_name") == group_name)
|
|
2137
|
+
)
|
|
2138
|
+
|
|
2139
|
+
# Remove FeatureGroup.
|
|
2140
|
+
res = _delete_data(schema_name=self.__repo,
|
|
2141
|
+
table_name=EFS_FEATURE_GROUP_SPEC["table_name"],
|
|
2142
|
+
delete_conditions=(Col("name") == group_name)
|
|
2143
|
+
)
|
|
2144
|
+
|
|
2145
|
+
# Remove Features.
|
|
2146
|
+
if feature_names:
|
|
2147
|
+
_delete_data(schema_name=self.__repo,
|
|
2148
|
+
table_name=EFS_FEATURES_SPEC["table_name"],
|
|
2149
|
+
delete_conditions=self.__get_features_where_clause(feature_names)
|
|
2150
|
+
)
|
|
2151
|
+
|
|
2152
|
+
# Remove entities.
|
|
2153
|
+
if entity_name:
|
|
2154
|
+
_delete_data(schema_name=self.__repo,
|
|
2155
|
+
table_name=EFS_ENTITY_XREF_SPEC["table_name"],
|
|
2156
|
+
delete_conditions=(Col("entity_name") == entity_name)
|
|
2157
|
+
)
|
|
2158
|
+
|
|
2159
|
+
_delete_data(schema_name=self.__repo,
|
|
2160
|
+
table_name=EFS_ENTITY_SPEC["table_name"],
|
|
2161
|
+
delete_conditions=(Col("name") == entity_name)
|
|
2162
|
+
)
|
|
2163
|
+
|
|
2164
|
+
# Remove DataSource.
|
|
2165
|
+
if data_source_name:
|
|
2166
|
+
_delete_data(schema_name=self.__repo,
|
|
2167
|
+
table_name=EFS_DATA_SOURCE_SPEC["table_name"],
|
|
2168
|
+
delete_conditions=(Col("name") == data_source_name),
|
|
2169
|
+
)
|
|
2170
|
+
|
|
2171
|
+
return res
|
|
2172
|
+
|
|
2173
|
+
@db_transaction
|
|
2174
|
+
def delete_feature_group(self, feature_group):
|
|
2175
|
+
"""
|
|
2176
|
+
DESCRIPTION:
|
|
2177
|
+
Removes archived FeatureGroup from repository.
|
|
2178
|
+
Note:
|
|
2179
|
+
Unlike 'archive_feature_group()', this function does not delete the
|
|
2180
|
+
associated Features, Entity and DataSource. One should delete those
|
|
2181
|
+
using 'delete_feature()', 'delete_entity()' and 'delete_data_source()'.
|
|
2182
|
+
|
|
2183
|
+
PARAMETERS:
|
|
2184
|
+
feature_group:
|
|
2185
|
+
Required Argument.
|
|
2186
|
+
Specifies either the name of FeatureGroup or Object of FeatureGroup
|
|
2187
|
+
to delete from repository.
|
|
2188
|
+
Types: str OR FeatureGroup
|
|
2189
|
+
|
|
2190
|
+
RETURNS:
|
|
2191
|
+
bool
|
|
2192
|
+
|
|
2193
|
+
RAISES:
|
|
2194
|
+
TeradataMLException, TypeError, ValueError
|
|
2195
|
+
|
|
2196
|
+
EXAMPLES:
|
|
2197
|
+
>>> from teradataml import DataFrame, FeatureGroup, FeatureStore
|
|
2198
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
2199
|
+
# Create teradataml DataFrame.
|
|
2200
|
+
>>> df = DataFrame("sales")
|
|
2201
|
+
# Create FeatureGroup from teradataml DataFrame.
|
|
2202
|
+
>>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_col_name="datetime")
|
|
2203
|
+
# Create FeatureStore for the repo 'staging_repo'.
|
|
2204
|
+
>>> fs = FeatureStore("staging_repo")
|
|
2205
|
+
# Apply FeatureGroup to FeatureStore.
|
|
2206
|
+
>>> fs.apply(fg)
|
|
2207
|
+
True
|
|
2208
|
+
# Let's first archive FeatureGroup with name "sales".
|
|
2209
|
+
>>> fs.archive_feature_group(feature_group='sales')
|
|
2210
|
+
FeatureGroup 'sales' is archived.
|
|
2211
|
+
True
|
|
2212
|
+
|
|
2213
|
+
# Delete FeatureGroup with name "sales".
|
|
2214
|
+
>>> fs.delete_feature_group(feature_group='sales')
|
|
2215
|
+
FeatureGroup 'sales' is deleted.
|
|
2216
|
+
True
|
|
2217
|
+
>>>
|
|
2218
|
+
"""
|
|
2219
|
+
argument_validation_params = []
|
|
2220
|
+
argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
|
|
2221
|
+
|
|
2222
|
+
# Validate argument types
|
|
2223
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
2224
|
+
|
|
2225
|
+
fg_name = feature_group if isinstance(feature_group, str) else feature_group.name
|
|
2226
|
+
|
|
2227
|
+
# Remove data for FeatureGroup.
|
|
2228
|
+
_delete_data(table_name=self.__table_names["group_features_staging"],
|
|
2229
|
+
schema_name=self.__repo,
|
|
2230
|
+
delete_conditions=(Col("group_name") == fg_name)
|
|
2231
|
+
)
|
|
2232
|
+
|
|
2233
|
+
res = _delete_data(table_name=self.__table_names["feature_group_staging"],
|
|
2234
|
+
schema_name=self.__repo,
|
|
2235
|
+
delete_conditions=(Col("name") == fg_name)
|
|
2236
|
+
)
|
|
2237
|
+
|
|
2238
|
+
if res == 1:
|
|
2239
|
+
print("FeatureGroup '{}' is deleted.".format(fg_name))
|
|
2240
|
+
return True
|
|
2241
|
+
|
|
2242
|
+
print("FeatureGroup '{}' not exist to delete.".format(fg_name))
|
|
2243
|
+
return False
|
|
2244
|
+
|
|
2245
|
+
def __get_obj_df(self, obj_type):
|
|
2246
|
+
"""
|
|
2247
|
+
DESCRIPTION:
|
|
2248
|
+
Internal method to return either Features DataFrame OR Entity DataFrame
|
|
2249
|
+
OR DataSource DataFrame OR FeatureGroup DataFrame.
|
|
2250
|
+
|
|
2251
|
+
PARAMETERS:
|
|
2252
|
+
obj_type
|
|
2253
|
+
Required Argument.
|
|
2254
|
+
Specifies the type of DataFrame to return.
|
|
2255
|
+
Allowed Values:
|
|
2256
|
+
* feature
|
|
2257
|
+
* feature_group
|
|
2258
|
+
* entity
|
|
2259
|
+
* data_source
|
|
2260
|
+
* group_features
|
|
2261
|
+
|
|
2262
|
+
RETURNS:
|
|
2263
|
+
teradataml DataFrame.
|
|
2264
|
+
|
|
2265
|
+
RAISES:
|
|
2266
|
+
None
|
|
2267
|
+
|
|
2268
|
+
EXAMPLES:
|
|
2269
|
+
fs.__get_features_df()
|
|
2270
|
+
"""
|
|
2271
|
+
if obj_type not in self.__df_container:
|
|
2272
|
+
from teradataml.dataframe.dataframe import in_schema
|
|
2273
|
+
|
|
2274
|
+
# For feature or feature_staging, join it with xref table
|
|
2275
|
+
# so group name appears while listing features.
|
|
2276
|
+
map_ = {"feature": "group_features", "feature_staging": "group_features_staging"}
|
|
2277
|
+
if obj_type in map_:
|
|
2278
|
+
features = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
2279
|
+
features_xref = DataFrame(in_schema(self.__repo, self.__table_names[map_[obj_type]])).select(
|
|
2280
|
+
["feature_name", "group_name"])
|
|
2281
|
+
df = features.join(features_xref, on="name==feature_name", how='left')
|
|
2282
|
+
self.__df_container[obj_type] = df.select(features.columns+["group_name"])
|
|
2283
|
+
# For entity, join with xref table.
|
|
2284
|
+
elif obj_type == "entity" or obj_type == "entity_staging":
|
|
2285
|
+
ent_df = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
2286
|
+
xref_df = DataFrame(in_schema(self.__repo, self.__table_names["{}_xref".format(obj_type)])).select(
|
|
2287
|
+
['entity_name', 'entity_column'])
|
|
2288
|
+
df = ent_df.join(xref_df, on="name==entity_name", how="inner")
|
|
2289
|
+
self.__df_container[obj_type] = df.select(ent_df.columns+["entity_column"])
|
|
2290
|
+
else:
|
|
2291
|
+
self.__df_container[obj_type] = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
2292
|
+
|
|
2293
|
+
return self.__df_container[obj_type]
|
|
2294
|
+
|
|
2295
|
+
def version(self):
|
|
2296
|
+
"""
|
|
2297
|
+
DESCRIPTION:
|
|
2298
|
+
Get the FeatureStore version.
|
|
2299
|
+
|
|
2300
|
+
PARAMETERS:
|
|
2301
|
+
None
|
|
2302
|
+
|
|
2303
|
+
RETURNS:
|
|
2304
|
+
str
|
|
2305
|
+
|
|
2306
|
+
RAISES:
|
|
2307
|
+
None
|
|
2308
|
+
|
|
2309
|
+
EXAMPLES:
|
|
2310
|
+
# Example 1: Get the version of FeatureStore version for
|
|
2311
|
+
# the repo 'vfs_v1'.
|
|
2312
|
+
>>> from teradataml import FeatureStore
|
|
2313
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
2314
|
+
>>> fs.version()
|
|
2315
|
+
'1.0.0'
|
|
2316
|
+
>>>
|
|
2317
|
+
"""
|
|
2318
|
+
return self.__version
|