teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +315 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +95 -8
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/metadata.py +12 -3
  8. teradataml/analytics/json_parser/utils.py +7 -2
  9. teradataml/analytics/sqle/__init__.py +5 -1
  10. teradataml/analytics/table_operator/__init__.py +1 -1
  11. teradataml/analytics/uaf/__init__.py +1 -1
  12. teradataml/analytics/utils.py +4 -0
  13. teradataml/analytics/valib.py +18 -4
  14. teradataml/automl/__init__.py +51 -6
  15. teradataml/automl/data_preparation.py +59 -35
  16. teradataml/automl/data_transformation.py +58 -33
  17. teradataml/automl/feature_engineering.py +27 -12
  18. teradataml/automl/model_training.py +73 -46
  19. teradataml/common/constants.py +88 -29
  20. teradataml/common/garbagecollector.py +2 -1
  21. teradataml/common/messagecodes.py +19 -3
  22. teradataml/common/messages.py +6 -1
  23. teradataml/common/sqlbundle.py +64 -12
  24. teradataml/common/utils.py +246 -47
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +161 -27
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/byom_example.json +11 -0
  29. teradataml/data/dataframe_example.json +18 -2
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  37. teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
  38. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  39. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  40. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  41. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  42. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  43. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  44. teradataml/data/hnsw_alter_data.csv +5 -0
  45. teradataml/data/hnsw_data.csv +10 -0
  46. teradataml/data/jsons/byom/h2opredict.json +1 -1
  47. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  48. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  49. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  50. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  51. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  52. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  53. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  54. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  55. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  56. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  57. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  58. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  59. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  60. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  61. teradataml/data/medical_readings.csv +101 -0
  62. teradataml/data/patient_profile.csv +101 -0
  63. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  64. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  65. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  66. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  67. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  68. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  69. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  70. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  71. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  72. teradataml/data/target_udt_data.csv +8 -0
  73. teradataml/data/templates/open_source_ml.json +3 -2
  74. teradataml/data/teradataml_example.json +8 -0
  75. teradataml/data/vectordistance_example.json +4 -0
  76. teradataml/dataframe/copy_to.py +8 -3
  77. teradataml/dataframe/data_transfer.py +11 -1
  78. teradataml/dataframe/dataframe.py +1049 -285
  79. teradataml/dataframe/dataframe_utils.py +152 -20
  80. teradataml/dataframe/functions.py +578 -35
  81. teradataml/dataframe/setop.py +11 -6
  82. teradataml/dataframe/sql.py +185 -16
  83. teradataml/dbutils/dbutils.py +1049 -115
  84. teradataml/dbutils/filemgr.py +48 -1
  85. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  86. teradataml/lib/aed_0_1.dll +0 -0
  87. teradataml/opensource/__init__.py +1 -1
  88. teradataml/opensource/_base.py +1466 -0
  89. teradataml/opensource/_class.py +464 -0
  90. teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
  91. teradataml/opensource/_lightgbm.py +949 -0
  92. teradataml/opensource/_sklearn.py +1008 -0
  93. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
  94. teradataml/options/__init__.py +54 -38
  95. teradataml/options/configure.py +131 -27
  96. teradataml/options/display.py +13 -2
  97. teradataml/plot/axis.py +47 -8
  98. teradataml/plot/figure.py +33 -0
  99. teradataml/plot/plot.py +63 -13
  100. teradataml/scriptmgmt/UserEnv.py +5 -5
  101. teradataml/scriptmgmt/lls_utils.py +130 -40
  102. teradataml/store/__init__.py +12 -0
  103. teradataml/store/feature_store/__init__.py +0 -0
  104. teradataml/store/feature_store/constants.py +291 -0
  105. teradataml/store/feature_store/feature_store.py +2318 -0
  106. teradataml/store/feature_store/models.py +1505 -0
  107. teradataml/table_operators/Apply.py +32 -18
  108. teradataml/table_operators/Script.py +3 -1
  109. teradataml/table_operators/TableOperator.py +3 -1
  110. teradataml/table_operators/query_generator.py +3 -0
  111. teradataml/table_operators/table_operator_query_generator.py +3 -1
  112. teradataml/table_operators/table_operator_util.py +37 -38
  113. teradataml/table_operators/templates/dataframe_register.template +69 -0
  114. teradataml/utils/dtypes.py +51 -2
  115. teradataml/utils/internal_buffer.py +18 -0
  116. teradataml/utils/validators.py +99 -8
  117. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
  118. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
  119. teradataml/libaed_0_1.dylib +0 -0
  120. teradataml/libaed_0_1.so +0 -0
  121. teradataml/opensource/sklearn/__init__.py +0 -1
  122. teradataml/opensource/sklearn/_class.py +0 -255
  123. teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
  124. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  125. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  126. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
@@ -0,0 +1,2318 @@
1
+ """
2
+ Copyright (c) 2024 by Teradata Corporation. All rights reserved.
3
+ TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
4
+
5
+ Primary Owner: pradeep.garre@teradata.com
6
+ Secondary Owner: adithya.avvaru@teradata.com
7
+
8
+ This file implements the core framework that allows user to use Teradata Enterprise Feature Store.
9
+ """
10
+
11
+ from sqlalchemy import literal_column
12
+ from teradataml.context.context import get_connection
13
+ from teradataml.common.constants import SQLConstants
14
+ from teradataml.common.exceptions import TeradataMlException
15
+ from teradataml.common.messages import Messages
16
+ from teradataml.common.messagecodes import MessageCodes
17
+ from teradataml.dataframe.sql import _SQLColumnExpression as Col
18
+ from teradataml.dbutils.dbutils import _create_database, _create_table, db_drop_table, execute_sql, Grant, Revoke, _update_data, _delete_data, db_transaction
19
+ from teradataml.store.feature_store.constants import *
20
+ from teradataml.store.feature_store.models import *
21
+ from teradataml.common.sqlbundle import SQLBundle
22
+ from teradataml.utils.validators import _Validators
23
+
24
+
25
+ class FeatureStore:
26
+ """Class for FeatureStore."""
27
+
28
+ def __init__(self, repo):
29
+ """
30
+ DESCRIPTION:
31
+ Method to create FeatureStore in teradataml.
32
+
33
+ PARAMETERS:
34
+ repo:
35
+ Required Argument.
36
+ Specifies the repository name.
37
+ Types: str.
38
+
39
+ RETURNS:
40
+ Object of FeatureStore.
41
+
42
+ RAISES:
43
+ None
44
+
45
+ EXAMPLES:
46
+ >>> # Create FeatureStore for repository 'vfs_v1'.
47
+ >>> from teradataml import FeatureStore
48
+ >>> fs = FeatureStore('vfs_v1')
49
+ >>> fs
50
+ FeatureStore(vfs_v1)-v1.0
51
+ >>>
52
+ """
53
+ argument_validation_params = []
54
+ argument_validation_params.append(["repo", repo, False, (str), True])
55
+
56
+ # Validate argument types
57
+ _Validators._validate_function_arguments(argument_validation_params)
58
+ # Do not validate the existance of repo as it consumes a network call.
59
+ self.__repo = repo
60
+ self.__version = ""
61
+
62
+ # Declare SQLBundle to use it further.
63
+ self.__sql_bundle = SQLBundle()
64
+
65
+ # Store all the DF's here so no need to create these every time.
66
+ self.__df_container = {}
67
+
68
+ # Store the table names here. Then use this where ever required.
69
+ self.__table_names = EFS_TABLES
70
+
71
+ # Declare getter's for getting the corresponding DataFrame's.
72
+ self.__get_features_df = lambda : self.__get_obj_df("feature")
73
+ self.__get_archived_features_df = lambda : self.__get_obj_df("feature_staging")
74
+ self.__get_group_features_df = lambda : self.__get_obj_df("group_features")
75
+ self.__get_archived_group_features_df = lambda : self.__get_obj_df("group_features_staging")
76
+ self.__get_feature_group_df = lambda : self.__get_obj_df("feature_group")
77
+ self.__get_archived_feature_group_df = lambda : self.__get_obj_df("feature_group_staging")
78
+ self.__get_entity_df = lambda : self.__get_obj_df("entity")
79
+ self.__get_archived_entity_df = lambda : self.__get_obj_df("entity_staging")
80
+ self.__get_data_source_df = lambda : self.__get_obj_df("data_source")
81
+ self.__get_archived_data_source_df = lambda : self.__get_obj_df("data_source_staging")
82
+
83
+ self.__good_status = "Good"
84
+ self.__bad_status = "Bad"
85
+ self.__repaired_status = "Repaired"
86
+
87
+ @property
88
+ def repo(self):
89
+ """
90
+ DESCRIPTION:
91
+ Get the repository.
92
+
93
+ PARAMETERS:
94
+ None
95
+
96
+ RETURNS:
97
+ str
98
+
99
+ RAISES:
100
+ None
101
+
102
+ EXAMPLES:
103
+ >>> from teradataml import FeatureStore
104
+ >>> fs = FeatureStore('vfs_v1')
105
+ >>> fs.repo
106
+ vfs_v1
107
+ >>>
108
+ """
109
+ return self.__repo
110
+
111
+ @repo.setter
112
+ def repo(self, value):
113
+ """
114
+ DESCRIPTION:
115
+ Set the repository.
116
+
117
+ PARAMETERS:
118
+ value:
119
+ Required Argument.
120
+ Specifies the repository name.
121
+ Types: str.
122
+
123
+ RETURNS:
124
+ None.
125
+
126
+ RAISES:
127
+ None
128
+
129
+ EXAMPLES:
130
+ # Example 1: Create a FeatureStore for repository 'abc' and
131
+ # then change the repository to 'xyz'.
132
+ >>> from teradataml import FeatureStore
133
+ >>> fs = FeatureStore('abc')
134
+ >>> fs.repo = 'xyz'
135
+ >>>
136
+ """
137
+ argument_validation_params = []
138
+ argument_validation_params.append(["value", value, False, (str), True])
139
+
140
+ # Validate argument types
141
+ _Validators._validate_function_arguments(argument_validation_params)
142
+ # remove all entries from container so they will be automatically
143
+ # point to new repo for subsequent API's.
144
+ self.__df_container.clear()
145
+ self.__version = None
146
+
147
+ # Set the repo value.
148
+ self.__repo = value
149
+
150
+ def __repr__(self):
151
+ """
152
+ DESCRIPTION:
153
+ String representation for FeatureStore object.
154
+
155
+ PARAMETERS:
156
+ None
157
+
158
+ RETURNS:
159
+ str
160
+
161
+ RAISES:
162
+ None
163
+ """
164
+ s = "VantageFeatureStore({})".format(self.__repo)
165
+ try:
166
+ version = "-v{}".format(self.__get_version())
167
+ except Exception as e:
168
+ version = ""
169
+ return "{}{}".format(s, version)
170
+
171
+ def __get_version(self):
172
+ """
173
+ DESCRIPTION:
174
+ Internal method to get the FeatureStore version.
175
+
176
+ PARAMETERS:
177
+ None
178
+
179
+ RETURNS:
180
+ str
181
+
182
+ RAISES:
183
+ None
184
+ """
185
+ if not self.__version:
186
+ sql = "SELECT version FROM {}.{}".format(self.__repo, EFS_VERSION_SPEC["table_name"])
187
+ self.__version = next(execute_sql(sql))[0]
188
+ return self.__version
189
+
190
+ @staticmethod
191
+ def list_repos() -> DataFrame:
192
+ """
193
+ DESCRIPTION:
194
+ Function to list down the repositories.
195
+
196
+ PARAMETERS:
197
+ None
198
+
199
+ RETURNS:
200
+ teradataml DataFrame
201
+
202
+ RAISES:
203
+ None
204
+
205
+ EXAMPLES:
206
+ # List down all the FeatureStore repositories.
207
+ >>> FeatureStore.list_repos()
208
+ repos
209
+ 0 vfs_v1
210
+ >>>
211
+ """
212
+ return DataFrame.from_query("select distinct DataBaseName as repos from dbc.tablesV where TableName='{}'".format(
213
+ EFS_VERSION_SPEC["table_name"]))
214
+
215
+ def setup(self, perm_size='10e9', spool_size='10e8'):
216
+ """
217
+ DESCRIPTION:
218
+ Function to setup all the required objects in Vantage for the specified
219
+ repository.
220
+ Note:
221
+ The function checks whether repository exists or not. If not exists,
222
+ it first creates the repository and then creates the corresponding tables.
223
+ Hence make sure the user with which is it connected to Vantage
224
+ has corresponding access rights for creating DataBase and creating
225
+ tables in the corresponding database.
226
+
227
+ PARAMETERS:
228
+ perm_size:
229
+ Optional Argument.
230
+ Specifies the number of bytes to allocate to FeatureStore "repo"
231
+ for permanent space.
232
+ Note:
233
+ Exponential notation can also be used.
234
+ Default Value: 10e9
235
+ Types: str or int
236
+
237
+ spool_size:
238
+ Optional Argument.
239
+ Specifies the number of bytes to allocate to FeatureStore "repo"
240
+ for spool space.
241
+ Note:
242
+ Exponential notation can also be used.
243
+ Default Value: 10e8
244
+ Types: str or int
245
+
246
+ RETURNS:
247
+ bool
248
+
249
+ RAISES:
250
+ TeradatamlException
251
+
252
+ EXAMPLES:
253
+ # Setup FeatureStore for repo 'vfs_v1'.
254
+ >>> from teradataml import FeatureStore
255
+ >>> fs = FeatureStore("vfs_v1")
256
+ >>> fs.setup()
257
+ True
258
+ >>>
259
+ """
260
+
261
+ repo_exists = get_connection().dialect._get_database_names(
262
+ get_connection(), self.__repo)
263
+
264
+ # If repo does not exist, then create it.
265
+ if not repo_exists:
266
+ _create_database(self.__repo, perm_size, spool_size)
267
+
268
+ # Check whether version table exists or not. If exist, assume all
269
+ # tables are available.
270
+ all_tables_exist = get_connection().dialect.has_table(
271
+ get_connection(), EFS_VERSION_SPEC['table_name'], schema=self.__repo)
272
+
273
+ if not all_tables_exist:
274
+ # Create the tables.
275
+ table_specs = [EFS_FEATURES_SPEC,
276
+ EFS_DATA_SOURCE_SPEC,
277
+ EFS_ENTITY_SPEC,
278
+ EFS_ENTITY_XREF_SPEC,
279
+ EFS_FEATURE_GROUP_SPEC,
280
+ EFS_GROUP_FEATURES_SPEC,
281
+ EFS_VERSION_SPEC]
282
+
283
+ staging_table_specs = [
284
+ EFS_FEATURES_STAGING_SPEC,
285
+ EFS_DATA_SOURCE_STAGING_SPEC,
286
+ EFS_ENTITY_STAGING_SPEC,
287
+ EFS_ENTITY_XREF_STAGING_SPEC,
288
+ EFS_GROUP_FEATURES_STAGING_SPEC,
289
+ EFS_FEATURE_GROUP_STAGING_SPEC
290
+ ]
291
+
292
+ triggers_specs = [
293
+ EFS_FEATURES_TRG,
294
+ EFS_GROUP_FEATURES_TRG,
295
+ EFS_FEATURE_GROUP_TRG,
296
+ EFS_DATA_SOURCE_TRG,
297
+ EFS_ENTITY_TRG,
298
+ EFS_ENTITY_XREF_TRG
299
+ ]
300
+
301
+ for table_spec in table_specs + staging_table_specs:
302
+ params_ = {"table_name": table_spec["table_name"],
303
+ "columns": table_spec["columns"],
304
+ "primary_index": table_spec.get("primary_index"),
305
+ "unique": True if table_spec.get("primary_index") else False,
306
+ "schema_name": self.__repo,
307
+ "set_table": False
308
+ }
309
+ if "foreign_keys" in table_spec:
310
+ params_["foreign_key_constraint"] = table_spec.get("foreign_keys")
311
+
312
+ _create_table(**params_)
313
+
314
+ for trigger_spec in triggers_specs:
315
+ execute_sql(trigger_spec.format(schema_name=self.__repo))
316
+
317
+ # After the setup is done, populate the version.
318
+ insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, EFS_VERSION_SPEC["table_name"])
319
+ execute_sql(insert_model, (EFS_VERSION, datetime.datetime.now()))
320
+
321
+ if repo_exists and all_tables_exist:
322
+ print("EFS is already setup for the repo {}.".format(self.__repo))
323
+
324
+ @property
325
+ def grant(self):
326
+ """
327
+ DESCRIPTION:
328
+ Grants access on FeatureStore.
329
+ Note:
330
+ One must have admin access to grant access.
331
+
332
+ PARAMETERS:
333
+ None
334
+
335
+ RETURNS:
336
+ bool
337
+
338
+ RAISES:
339
+ OperationalError
340
+
341
+ EXAMPLES:
342
+ >>> from teradataml import FeatureStore
343
+ # Create FeatureStore for repo 'vfs_v1'.
344
+ >>> fs = FeatureStore("vfs_v1")
345
+ # Setup FeatureStore for this repository.
346
+ >>> fs.setup()
347
+ True
348
+
349
+ # Example 1: Grant read access on FeatureStore to user 'BoB'.
350
+ >>> fs.grant.read('BoB')
351
+ True
352
+
353
+ # Example 2: Grant write access on FeatureStore to user 'BoB'.
354
+ >>> fs.grant.write('BoB')
355
+ True
356
+
357
+ # Example 3: Grant read and write access on FeatureStore to user 'BoB'.
358
+ >>> fs.grant.read_write('BoB')
359
+ True
360
+
361
+ """
362
+ table_names = {name: UtilFuncs._get_qualified_table_name(self.__repo, table_name)
363
+ for name, table_name in EFS_TABLES.items()}
364
+ return Grant(list(table_names.values()))
365
+
366
+ @property
367
+ def revoke(self):
368
+ """
369
+ DESCRIPTION:
370
+ Revokes access on FeatureStore.
371
+ Note:
372
+ One must have admin access to revoke access.
373
+
374
+ PARAMETERS:
375
+ None
376
+
377
+ RETURNS:
378
+ bool
379
+
380
+ RAISES:
381
+ OperationalError
382
+
383
+ EXAMPLES:
384
+ >>> from teradataml import FeatureStore
385
+ # Create FeatureStore for repo 'vfs_v1'.
386
+ >>> fs = FeatureStore("vfs_v1")
387
+ # Setup FeatureStore for this repository.
388
+ >>> fs.setup()
389
+ True
390
+
391
+ # Example 1: Revoke read access on FeatureStore from user 'BoB'.
392
+ >>> fs.revoke.read('BoB')
393
+ True
394
+
395
+ # Example 2: Revoke write access on FeatureStore from user 'BoB'.
396
+ >>> fs.revoke.write('BoB')
397
+ True
398
+
399
+ # Example 3: Revoke read and write access on FeatureStore from user 'BoB'.
400
+ >>> fs.revoke.read_write('BoB')
401
+ True
402
+ """
403
+ table_names = {name: UtilFuncs._get_qualified_table_name(self.__repo, table_name)
404
+ for name, table_name in EFS_TABLES.items()}
405
+ return Revoke(list(table_names.values()))
406
+
407
+ def repair(self):
408
+ """
409
+ DESCRIPTION:
410
+ Repairs the existing repo.
411
+ Notes:
412
+ * The method checks for the corresponding missing database objects which are
413
+ required for FeatureStore. If any of the database object is not available,
414
+ then it tries to create the object.
415
+ * The method repairs only the underlying tables and not data inside the
416
+ corresponding table.
417
+
418
+ PARAMETERS:
419
+ None
420
+
421
+ RETURNS:
422
+ bool
423
+
424
+ RAISES:
425
+ TeradatamlException
426
+
427
+ EXAMPLES:
428
+ # Repair FeatureStore repo 'vfs_v1'.
429
+ >>> from teradataml import FeatureStore
430
+ >>> fs = FeatureStore("vfs_v1")
431
+ >>> fs.repair()
432
+ True
433
+ >>>
434
+ """
435
+
436
+ # Repair Features, Entities and DataSources first. Then FeatureGroup and then Group Features.
437
+ group_features_ = [EFS_GROUP_FEATURES_STAGING_SPEC, EFS_GROUP_FEATURES_SPEC, EFS_GROUP_FEATURES_TRG, "GroupFeatures"]
438
+ feature_group_ = [EFS_FEATURE_GROUP_STAGING_SPEC, EFS_FEATURE_GROUP_SPEC, EFS_FEATURE_GROUP_TRG, "FeatureGroup"]
439
+ featuers_ = [EFS_FEATURES_STAGING_SPEC, EFS_FEATURES_SPEC, EFS_FEATURES_TRG, "Feature"]
440
+ entities_ = [EFS_ENTITY_STAGING_SPEC, EFS_ENTITY_SPEC, EFS_ENTITY_TRG, "Entity"]
441
+ entities_xref_ = [EFS_ENTITY_XREF_STAGING_SPEC, EFS_ENTITY_XREF_SPEC, EFS_ENTITY_XREF_TRG, "EntityXref"]
442
+ data_sources_ = [EFS_DATA_SOURCE_STAGING_SPEC, EFS_DATA_SOURCE_SPEC, EFS_DATA_SOURCE_TRG, "DataSource"]
443
+
444
+
445
+ for staging_table_, table_, trigger, obj_name in (group_features_, feature_group_, featuers_, entities_, entities_xref_, data_sources_):
446
+ status = []
447
+ print("Repairing objects related to {}.".format(obj_name))
448
+
449
+ status.append(self.__try_create_table(staging_table_))
450
+ status.append(self.__try_create_table(table_))
451
+ status.append(self.__try_create_trigger(trigger, "{}_trg".format(table_["table_name"])))
452
+
453
+ # Let user know about status.
454
+ # If any of the status is Bad, then repair is failed.
455
+ # Else, If any of the status is Repaired, then sucessfully repaired.
456
+ # Else no need to repair the object.
457
+ if self.__bad_status in status:
458
+ print("Unable to repair objects related to {}.".format(obj_name))
459
+ else:
460
+ if self.__repaired_status in status:
461
+ print("Successfully repaired objects related to {}.".format(obj_name))
462
+ else:
463
+ print("{} objects are good and do not need any repair.".format(obj_name))
464
+
465
+ # Repair the version table.
466
+ status = self.__try_create_table(EFS_VERSION_SPEC)
467
+ if status == self.__repaired_status:
468
+ # After the setup is done, populate the version.
469
+ insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, EFS_VERSION_SPEC["table_name"])
470
+ execute_sql(insert_model, (EFS_VERSION, datetime.datetime.now()))
471
+
472
+ return True
473
+
474
+ def __try_create_table(self, table_spec):
475
+ """
476
+ DESCRIPTION:
477
+ Internal function to create a table from table spec.
478
+
479
+ PARAMETERS:
480
+ table_spec:
481
+ Required Argument.
482
+ Specifies the spec for the corresponding table.
483
+ Types: dict
484
+
485
+ RETURNS:
486
+ str
487
+ Note:
488
+ Method can return three different values of strings.
489
+ * Good - When table to create already exists.
490
+ * Repaired - When is created.
491
+ * Bad - When table not exists and method unable to create table.
492
+
493
+ RAISES:
494
+ None
495
+
496
+ EXAMPLES:
497
+ self.__try_create_table(EFS_VERSION_SPEC)
498
+ """
499
+ try:
500
+ _create_table(table_spec["table_name"],
501
+ columns=table_spec["columns"],
502
+ primary_index=table_spec.get("primary_index"),
503
+ unique=True if table_spec.get("primary_index") else False,
504
+ schema_name=self.__repo,
505
+ set_table=False)
506
+ return self.__repaired_status
507
+ except Exception as e:
508
+ if "Table '{}' already exists".format(table_spec["table_name"]) in str(e):
509
+ return self.__good_status
510
+ else:
511
+ print(str(e))
512
+ return self.__bad_status
513
+
514
+ def __try_create_trigger(self, trigger_spec, trigger_name):
515
+ """
516
+ DESCRIPTION:
517
+ Internal function to create trigger.
518
+
519
+ PARAMETERS:
520
+ trigger_spec:
521
+ Required Argument.
522
+ Specifies the spec for the corresponding trigger.
523
+ Types: str
524
+
525
+ trigger_name:
526
+ Required Argument.
527
+ Specifies the name of the trigger to create.
528
+ Types: str
529
+
530
+ RETURNS:
531
+ str
532
+ Note:
533
+ Method can return three different values of strings.
534
+ * Good - When trigger to create already exists.
535
+ * Repaired - When trigger is created.
536
+ * Bad - When trigger not exists and method unable to create trigger.
537
+
538
+ RAISES:
539
+ None
540
+
541
+ EXAMPLES:
542
+ self.__try_create_trigger(EFS_FEATURE_TRIGGER_SPEC)
543
+ """
544
+ try:
545
+ execute_sql(trigger_spec.format(schema_name=self.__repo))
546
+ return self.__repaired_status
547
+ except Exception as e:
548
+ if "Trigger '{}' already exists".format(trigger_name) in str(e):
549
+ return self.__good_status
550
+ else:
551
+ print("Unable to create trigger '{}'. Error - {}".format(trigger_name, str(e)))
552
+ return self.__bad_status
553
+
554
+ def list_features(self, archived=False) -> DataFrame:
555
+ """
556
+ DESCRIPTION:
557
+ List all the features.
558
+
559
+ PARAMETERS:
560
+ archived:
561
+ Optional Argument.
562
+ Specifies whether to list effective features or archived features.
563
+ When set to False, effective features in FeatureStore are listed,
564
+ otherwise, archived features are listed.
565
+ Default Value: False
566
+ Types: bool
567
+
568
+ RETURNS:
569
+ teradataml DataFrame
570
+
571
+ RAISES:
572
+ None
573
+
574
+ EXAMPLES:
575
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
576
+ >>> load_example_data('dataframe', 'sales')
577
+ # Create FeatureStore for repo 'vfs_v1'.
578
+ >>> fs = FeatureStore("vfs_v1")
579
+ # Create teradataml DataFrame.
580
+ >>> df = DataFrame("sales")
581
+ # Create a FeatureGroup from teradataml DataFrame.
582
+ >>> fg = FeatureGroup.from_DataFrame(name='sales',
583
+ ... entity_columns='accounts',
584
+ ... df=df,
585
+ ... timestamp_col_name='datetime')
586
+ # Apply the FeatureGroup to FeatureStore.
587
+ >>> fs.apply(fg)
588
+ True
589
+
590
+ # Example 1: List all the effective Features in the repo 'vfs_v1'.
591
+ >>> fs.list_features()
592
+ column_name description creation_time modified_time tags data_type feature_type status group_name
593
+ name
594
+ Mar Mar None 2024-09-30 11:21:43.314118 None None BIGINT CONTINUOUS ACTIVE sales
595
+ Jan Jan None 2024-09-30 11:21:42.655343 None None BIGINT CONTINUOUS ACTIVE sales
596
+ Apr Apr None 2024-09-30 11:21:44.143402 None None BIGINT CONTINUOUS ACTIVE sales
597
+ Feb Feb None 2024-09-30 11:21:41.542627 None None FLOAT CONTINUOUS ACTIVE sales
598
+ >>>
599
+
600
+ # Example 2: List all the archived Features in the repo 'vfs_v1'.
601
+ # Note: Feature can only be archived when it is not associated with any Group.
602
+ # Let's remove Feature 'Feb' from FeatureGroup.
603
+ >>> fg.remove(fs.get_feature('Feb'))
604
+ True
605
+ # Apply the modified FeatureGroup to FeatureStore.
606
+ >>> fs.apply(fg)
607
+ True
608
+ # Archive Feature 'Feb'.
609
+ >>> fs.archive_feature('Feb')
610
+ Feature 'Feb' is archived.
611
+ True
612
+
613
+ # List all the archived Features in the repo 'vfs_v1'.
614
+ >>> fs.list_features(archived=True)
615
+ name column_name description creation_time modified_time tags data_type feature_type status archived_time group_name
616
+ 0 Feb Feb None 2024-09-30 11:21:41.542627 None None FLOAT CONTINUOUS ACTIVE 2024-09-30 11:30:49.160000 sales
617
+ >>>
618
+ """
619
+ return self.__get_archived_features_df() if archived else self.__get_features_df()
620
+
621
+ def list_entities(self, archived=False) -> DataFrame:
622
+ """
623
+ DESCRIPTION:
624
+ List all the entities.
625
+
626
+ PARAMETERS:
627
+ archived:
628
+ Optional Argument.
629
+ Specifies whether to list effective entities or archived entities.
630
+ When set to False, effective entities in FeatureStore are listed,
631
+ otherwise, archived entities are listed.
632
+ Default Value: False
633
+ Types: bool
634
+
635
+ RETURNS:
636
+ teradataml DataFrame
637
+
638
+ RAISES:
639
+ None
640
+
641
+ EXAMPLES:
642
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
643
+ >>> load_example_data('dataframe', 'sales')
644
+ # Create FeatureStore for repo 'vfs_v1'.
645
+ >>> fs = FeatureStore("vfs_v1")
646
+ # Create teradataml DataFrame.
647
+ >>> df = DataFrame("sales")
648
+ # Create a FeatureGroup from teradataml DataFrame.
649
+ >>> fg = FeatureGroup.from_DataFrame(name='sales',
650
+ ... entity_columns='accounts',
651
+ ... df=df,
652
+ ... timestamp_col_name='datetime')
653
+ # Apply the FeatureGroup to FeatureStore.
654
+ >>> fs.apply(fg)
655
+ True
656
+
657
+ # Example 1: List all the effective Entities in the repo 'vfs_v1'.
658
+ >>> fs.list_entities()
659
+ description
660
+ name entity_column
661
+ sales accounts None
662
+ >>>
663
+
664
+ # Example 2: List all the archived Entities in the repo 'vfs_v1'.
665
+ # Note: Entity cannot be archived if it is a part of FeatureGroup.
666
+ # First create another Entity, and update FeatureGroup with
667
+ # other Entity. Then archive Entity 'sales'.
668
+ >>> entity = Entity('store_sales', columns=df.accounts)
669
+ # Update new entity to FeatureGroup.
670
+ >>> fg.apply(entity)
671
+ # Update FeatureGroup to FeatureStore. This will update Entity
672
+ # from 'sales' to 'store_sales' for FeatureGroup 'sales'.
673
+ >>> fs.apply(fg)
674
+ True
675
+ # Let's archive Entity 'sales' since it is not part of any FeatureGroup.
676
+ >>> fs.archive_entity('sales')
677
+ Entity 'sales' is archived.
678
+ True
679
+ >>>
680
+
681
+ # List the archived entities.
682
+ >>> fs.list_entities(archived=True)
683
+ name description creation_time modified_time archived_time entity_column
684
+ 0 sales None 2024-10-18 05:41:36.932856 None 2024-10-18 05:50:00.930000 accounts
685
+ >>>
686
+ """
687
+ return self.__get_archived_entity_df() if archived else self.__get_entity_df()
688
+
689
+ def list_data_sources(self, archived=False) -> DataFrame:
690
+ """
691
+ DESCRIPTION:
692
+ List all the Data Sources.
693
+
694
+ PARAMETERS:
695
+ archived:
696
+ Optional Argument.
697
+ Specifies whether to list effective data sources or archived data sources.
698
+ When set to False, effective data sources in FeatureStore are listed,
699
+ otherwise, archived data sources are listed.
700
+ Default Value: False
701
+ Types: bool
702
+
703
+ RETURNS:
704
+ teradataml DataFrame
705
+
706
+ RAISES:
707
+ None
708
+
709
+ EXAMPLES:
710
+ >>> from teradataml import DataSource, FeatureStore, load_example_data
711
+ >>> load_example_data("dataframe", "admissions_train")
712
+ # Create teradataml DataFrame.
713
+ >>> admissions=DataFrame("admissions_train")
714
+ # Create FeatureStore for repo 'vfs_v1'.
715
+ >>> fs = FeatureStore("vfs_v1")
716
+ # Create DataSource using teradataml DataFrame.
717
+ >>> ds = DataSource(name='admissions', source=admissions)
718
+ # Apply the DataSource to FeatureStore.
719
+ >>> fs.apply(ds)
720
+ True
721
+
722
+ # Example 1: List all the effective DataSources in the repo 'vfs_v1'.
723
+ >>> fs.list_data_sources()
724
+ description timestamp_col_name source
725
+ name
726
+ admissions None None select * from "admissions_train"
727
+ >>>
728
+
729
+ # Example 2: List all the archived DataSources in the repo 'vfs_v1'.
730
+ # Let's first archive the DataSource.
731
+ >>> fs.archive_data_source('admissions')
732
+ DataSource 'admissions' is archived.
733
+ True
734
+ # List archived DataSources.
735
+ >>> fs.list_data_sources(archived=True)
736
+ description timestamp_col_name source archived_time
737
+ name
738
+ admissions None None select * from "admissions_train" 2024-09-30 12:05:39.220000
739
+ >>>
740
+ """
741
+ return self.__get_archived_data_source_df() if archived else self.__get_data_source_df()
742
+
743
+ def list_feature_groups(self, archived=False) -> DataFrame:
744
+ """
745
+ DESCRIPTION:
746
+ List all the FeatureGroups.
747
+
748
+ PARAMETERS:
749
+ archived:
750
+ Optional Argument.
751
+ Specifies whether to list effective feature groups or archived feature groups.
752
+ When set to False, effective feature groups in FeatureStore are listed,
753
+ otherwise, archived feature groups are listed.
754
+ Default Value: False
755
+ Types: bool
756
+
757
+ RETURNS:
758
+ teradataml DataFrame
759
+
760
+ RAISES:
761
+ None
762
+
763
+ EXAMPLES:
764
+ >>> from teradataml import FeatureGroup, FeatureStore, load_example_data
765
+ >>> load_example_data("dataframe", "admissions_train")
766
+ # Create teradataml DataFrame.
767
+ >>> admissions=DataFrame("admissions_train")
768
+ # Create FeatureStore for repo 'vfs_v1'.
769
+ >>> fs = FeatureStore("vfs_v1")
770
+ # Create a FeatureGroup from DataFrame.
771
+ >>> fg = FeatureGroup.from_DataFrame("admissions", df=admissions, entity_columns='id')
772
+ # Apply FeatureGroup to FeatureStore.
773
+ >>> fs.apply(fg)
774
+ True
775
+
776
+ # Example 1: List all the effective FeatureGroups in the repo 'vfs_v1'.
777
+ >>> fs.list_feature_groups()
778
+ description data_source_name entity_name
779
+ name
780
+ admissions None admissions admissions
781
+ >>>
782
+
783
+ # Example 2: List all the archived FeatureGroups in the repo 'vfs_v1'.
784
+ # Let's first archive the FeatureGroup.
785
+ >>> fs.archive_feature_group("admissions")
786
+ True
787
+ >>>
788
+ # List archived FeatureGroups.
789
+ >>> fs.list_feature_groups(archived=True)
790
+ name description data_source_name entity_name archived_time
791
+ 0 admissions None admissions admissions 2024-09-30 12:05:39.220000
792
+ >>>
793
+ """
794
+ return self.__get_archived_feature_group_df() if archived else self.__get_feature_group_df()
795
+
796
+ def get_feature(self, name):
797
+ """
798
+ DESCRIPTION:
799
+ Retrieve the feature.
800
+
801
+ PARAMETERS:
802
+ name:
803
+ Required Argument.
804
+ Specifies the name of the feature to get.
805
+ Types: str
806
+
807
+ RETURNS:
808
+ Feature.
809
+
810
+ RAISES:
811
+ TeradataMLException
812
+
813
+ EXAMPLES:
814
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
815
+ # Load the sales data to Vantage.
816
+ >>> load_example_data("dataframe", "sales")
817
+ # Create DataFrame on sales data.
818
+ >>> df = DataFrame("sales")
819
+ >>> df
820
+ Feb Jan Mar Apr datetime
821
+ accounts
822
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
823
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
824
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
825
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
826
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
827
+ >>>
828
+ # Create Feature for column 'Mar' with name 'sales_mar'.
829
+ >>> feature = Feature('sales_mar', column=df.Mar)
830
+ # Apply the Feature to FeatureStore.
831
+ >>> fs = FeatureStore("vfs_v1")
832
+ >>> fs.apply(feature)
833
+ True
834
+
835
+ # Get the feature 'sales_mar' from repo 'vfs_v1'.
836
+ >>> feature = fs.get_feature('sales_mar')
837
+ >>> feature
838
+ Feature(name=sales_mar)
839
+ >>>
840
+ """
841
+ argument_validation_params = []
842
+ argument_validation_params.append(["name", name, False, (str), True])
843
+
844
+ # Validate argument types
845
+ _Validators._validate_function_arguments(argument_validation_params)
846
+
847
+ df = self.list_features()
848
+ df = df[df.name == name]
849
+
850
+ # Check if a feature with that name exists or not. If not, raise error.
851
+ if df.shape[0] == 0:
852
+ msg_code = MessageCodes.FUNC_EXECUTION_FAILED
853
+ error_msg = Messages.get_message(
854
+ msg_code, "get_feature()", "Feature with name '{}' does not exist.".format(name))
855
+ raise TeradataMlException(error_msg, msg_code)
856
+
857
+ return Feature._from_df(df)
858
+
859
+ def get_group_features(self, group_name):
860
+ """
861
+ DESCRIPTION:
862
+ Get the Features from the given feature group name.
863
+
864
+ PARAMETERS:
865
+ group_name:
866
+ Required Argument.
867
+ Specifies the name of the group the feature belongs to.
868
+ Types: str
869
+
870
+ RETURNS:
871
+ List of Feature objects.
872
+
873
+ RAISES:
874
+ TeradataMLException
875
+
876
+ EXAMPLES:
877
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
878
+ # Load the sales data to Vantage.
879
+ >>> load_example_data("dataframe", "sales")
880
+ # Create DataFrame on sales data.
881
+ >>> df = DataFrame("sales")
882
+ >>> df
883
+ >>> df
884
+ Feb Jan Mar Apr datetime
885
+ accounts
886
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
887
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
888
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
889
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
890
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
891
+ >>>
892
+ # Create FeatureGroup with name 'sales' from DataFrame.
893
+ >>> fg = FeatureGroup.from_DataFrame(
894
+ ... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
895
+ # Apply the FeatureGroup to FeatureStore.
896
+ >>> fs = FeatureStore("vfs_v1")
897
+ >>> fs.apply(fg)
898
+ True
899
+
900
+ # Get all the features belongs to the group 'sales' from repo 'vfs_v1'.
901
+ >>> features = fs.get_group_features('sales')
902
+ >>> features
903
+ [Feature(name=Jan), Feature(name=Feb), Feature(name=Apr), Feature(name=Mar)]
904
+ >>>
905
+ """
906
+ argument_validation_params = []
907
+ argument_validation_params.append(["group_name", group_name, False, (str), True])
908
+
909
+ # Validate argument types
910
+ _Validators._validate_function_arguments(argument_validation_params)
911
+
912
+ # Select active features.
913
+ features_df = self.__get_features_df()
914
+ features_df = features_df[((features_df.status != FeatureStatus.INACTIVE.name) & (features_df.group_name == group_name))]
915
+
916
+ # Check if a feature with that group name exists or not. If not, raise error.
917
+ if features_df.shape[0] == 0:
918
+ msg_code = MessageCodes.FUNC_EXECUTION_FAILED
919
+ error_msg = Messages.get_message(
920
+ msg_code, "get_group_features()", "No features found for group '{}'.".format(group_name))
921
+ raise TeradataMlException(error_msg, msg_code)
922
+
923
+ return Feature._from_df(features_df)
924
+
925
+ def get_feature_group(self, name):
926
+ """
927
+ DESCRIPTION:
928
+ Retrieve the FeatureGroup using name.
929
+
930
+ PARAMETERS:
931
+ name:
932
+ Required Argument.
933
+ Specifies the name of the feature group to be retrieved.
934
+ Types: str
935
+
936
+ RETURNS:
937
+ Object of FeatureGroup
938
+
939
+ RAISES:
940
+ TeradataMLException
941
+
942
+ EXAMPLES:
943
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
944
+ # Load the sales data to Vantage.
945
+ >>> load_example_data("dataframe", "sales")
946
+ # Create DataFrame on sales data.
947
+ >>> df = DataFrame("sales")
948
+ >>> df
949
+ Feb Jan Mar Apr datetime
950
+ accounts
951
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
952
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
953
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
954
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
955
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
956
+ >>>
957
+ # Create FeatureGroup with name 'sales' from DataFrame.
958
+ >>> fg = FeatureGroup.from_DataFrame(
959
+ ... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
960
+ # Apply the FeatureGroup to FeatureStore.
961
+ >>> fs = FeatureStore("vfs_v1")
962
+ >>> fs.apply(fg)
963
+ True
964
+
965
+ # Get FeatureGroup with group name 'sales' from repo 'vfs_v1'.
966
+ >>> fg = fs.get_feature_group('sales')
967
+ >>> fg
968
+ FeatureGroup(sales, features=[Feature(name=Jan), Feature(name=Feb), Feature(name=Apr), Feature(name=Mar)], entity=Entity(name=sales), data_source=DataSource(name=sales))
969
+ >>>
970
+ """
971
+ argument_validation_params = []
972
+ argument_validation_params.append(["name", name, False, (str), True])
973
+
974
+ # Validate argument types
975
+ _Validators._validate_function_arguments(argument_validation_params)
976
+
977
+ df = self.list_feature_groups()
978
+ df = df[df.name == name]
979
+
980
+ # Check if a feature with that name exists or not. If not, raise error.
981
+ if df.shape[0] == 0:
982
+ msg_code = MessageCodes.FUNC_EXECUTION_FAILED
983
+ error_msg = Messages.get_message(
984
+ msg_code, "get_feature_group()", "FeatureGroup with name '{}' does not exist.".format(name))
985
+ raise TeradataMlException(error_msg, msg_code)
986
+
987
+ return FeatureGroup._from_df(df,
988
+ self.__repo,
989
+ self.__get_features_df(),
990
+ self.__get_entity_df(),
991
+ self.__get_data_source_df()
992
+ )
993
+
994
+ def get_entity(self, name):
995
+ """
996
+ DESCRIPTION:
997
+ Get the entity from feature store.
998
+
999
+ PARAMETERS:
1000
+ name:
1001
+ Required Argument.
1002
+ Specifies the name of the entity.
1003
+ Types: str
1004
+
1005
+ RETURNS:
1006
+ Object of Entity.
1007
+
1008
+ RAISES:
1009
+ None
1010
+
1011
+ EXAMPLES:
1012
+ >>> from teradataml import DataFrame, Entity, FeatureStore, load_example_data
1013
+ # Load the admissions data to Vantage.
1014
+ >>> load_example_data("dataframe", "admissions_train")
1015
+ # Create DataFrame on admissions data.
1016
+ >>> df = DataFrame("admissions_train")
1017
+ >>> df
1018
+ masters gpa stats programming admitted
1019
+ id
1020
+ 34 yes 3.85 Advanced Beginner 0
1021
+ 32 yes 3.46 Advanced Beginner 0
1022
+ 11 no 3.13 Advanced Advanced 1
1023
+ 40 yes 3.95 Novice Beginner 0
1024
+ 38 yes 2.65 Advanced Beginner 1
1025
+ 36 no 3.00 Advanced Novice 0
1026
+ 7 yes 2.33 Novice Novice 1
1027
+ 26 yes 3.57 Advanced Advanced 1
1028
+ 19 yes 1.98 Advanced Advanced 0
1029
+ 13 no 4.00 Advanced Novice 1
1030
+ >>>
1031
+ # Create Entity for column 'id' with name 'admissions_id'.
1032
+ >>> entity = Entity(name='admissions_id', description="Entity for admissions", columns=df.id)
1033
+ # Apply the Entity to FeatureStore 'vfs_v1'.
1034
+ >>> fs = FeatureStore('vfs_v1')
1035
+ >>> fs.apply(entity)
1036
+ True
1037
+ >>>
1038
+
1039
+ # Get the Entity 'admissions_id' from repo 'vfs_v1'
1040
+ >>> entity = fs.get_entity('admissions_id')
1041
+ >>> entity
1042
+ Entity(name=admissions_id)
1043
+ >>>
1044
+ """
1045
+ argument_validation_params = []
1046
+ argument_validation_params.append(["name", name, False, (str), True])
1047
+
1048
+ # Validate argument types
1049
+ _Validators._validate_function_arguments(argument_validation_params)
1050
+
1051
+ df = self.__get_entity_df()
1052
+ df = df[df.name==name]
1053
+
1054
+ # Check if entity with that name exists or not. If not, raise error.
1055
+ if df.shape[0] == 0:
1056
+ msg_code = MessageCodes.FUNC_EXECUTION_FAILED
1057
+ error_msg = Messages.get_message(
1058
+ msg_code, "get_entity()", "Entity with name '{}' does not exist.".format(name))
1059
+ raise TeradataMlException(error_msg, msg_code)
1060
+ return Entity._from_df(df)
1061
+
1062
+ def get_data_source(self, name):
1063
+ """
1064
+ DESCRIPTION:
1065
+ Get the data source from feature store.
1066
+
1067
+ PARAMETERS:
1068
+ name:
1069
+ Required Argument.
1070
+ Specifies the name of the data source.
1071
+ Types: str
1072
+
1073
+ RETURNS:
1074
+ Object of DataSource.
1075
+
1076
+ RAISES:
1077
+ TeradataMLException
1078
+
1079
+ EXAMPLES:
1080
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1081
+ # Load the admissions data to Vantage.
1082
+ >>> load_example_data("dataframe", "admissions_train")
1083
+ # Create DataFrame on admissions data.
1084
+ >>> df = DataFrame("admissions_train")
1085
+ >>> df
1086
+ masters gpa stats programming admitted
1087
+ id
1088
+ 34 yes 3.85 Advanced Beginner 0
1089
+ 32 yes 3.46 Advanced Beginner 0
1090
+ 11 no 3.13 Advanced Advanced 1
1091
+ 40 yes 3.95 Novice Beginner 0
1092
+ 38 yes 2.65 Advanced Beginner 1
1093
+ 36 no 3.00 Advanced Novice 0
1094
+ 7 yes 2.33 Novice Novice 1
1095
+ 26 yes 3.57 Advanced Advanced 1
1096
+ 19 yes 1.98 Advanced Advanced 0
1097
+ 13 no 4.00 Advanced Novice 1
1098
+ >>>
1099
+ # Create DataSource using DataFrame 'df' with name 'admissions'.
1100
+ >>> ds = DataSource('admissions', source=df)
1101
+ # Apply the DataSource to FeatureStore 'vfs_v1'.
1102
+ >>> fs = FeatureStore('vfs_v1')
1103
+ >>> fs.apply(ds)
1104
+ True
1105
+ >>>
1106
+
1107
+ # Get the DataSource 'admissions' from repo 'vfs_v1'
1108
+ >>> ds = fs.get_data_source('admissions')
1109
+ >>> ds
1110
+ DataSource(name=admissions)
1111
+ >>>
1112
+ """
1113
+ argument_validation_params = []
1114
+ argument_validation_params.append(["name", name, False, (str), True])
1115
+
1116
+ # Validate argument types
1117
+ _Validators._validate_function_arguments(argument_validation_params)
1118
+
1119
+ df = self.__get_data_source_df()
1120
+ df = df[df.name == name]
1121
+
1122
+ # Check if a entity with that name exists or not. If not, raise error.
1123
+ if df.shape[0] == 0:
1124
+ msg_code = MessageCodes.FUNC_EXECUTION_FAILED
1125
+ error_msg = Messages.get_message(
1126
+ msg_code, "get_data_source()", "DataSource with name '{}' does not exist.".format(name))
1127
+ raise TeradataMlException(error_msg, msg_code)
1128
+
1129
+ return DataSource._from_df(df)
1130
+
1131
+ def set_features_inactive(self, names):
1132
+ """
1133
+ DESCRIPTION:
1134
+ Mark the feature status as 'inactive'. Note that, inactive features are
1135
+ not available for any further processing. Set the status as 'active' with
1136
+ "set_features_active()" method.
1137
+
1138
+ PARAMETERS:
1139
+ names:
1140
+ Required Argument.
1141
+ Specifies the name(s) of the feature(s).
1142
+ Types: str OR list of str
1143
+
1144
+ RETURNS:
1145
+ bool
1146
+
1147
+ RAISES:
1148
+ teradataMLException
1149
+
1150
+ EXAMPLES:
1151
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1152
+ # Load the admissions data to Vantage.
1153
+ >>> load_example_data("dataframe", "admissions_train")
1154
+ # Create DataFrame on admissions data.
1155
+ >>> df = DataFrame("admissions_train")
1156
+ >>> df
1157
+ masters gpa stats programming admitted
1158
+ id
1159
+ 34 yes 3.85 Advanced Beginner 0
1160
+ 32 yes 3.46 Advanced Beginner 0
1161
+ 11 no 3.13 Advanced Advanced 1
1162
+ 40 yes 3.95 Novice Beginner 0
1163
+ 38 yes 2.65 Advanced Beginner 1
1164
+ 36 no 3.00 Advanced Novice 0
1165
+ 7 yes 2.33 Novice Novice 1
1166
+ 26 yes 3.57 Advanced Advanced 1
1167
+ 19 yes 1.98 Advanced Advanced 0
1168
+ 13 no 4.00 Advanced Novice 1
1169
+ >>>
1170
+ # Create FeatureGroup from DataFrame df.
1171
+ >>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
1172
+ # Apply the FeatureGroup to FeatureStore 'vfs_v1'.
1173
+ >>> fs = FeatureStore('vfs_v1')
1174
+ >>> fs.apply(fg)
1175
+ True
1176
+ # Get FeatureGroup 'admissions' from FeatureStore.
1177
+ >>> fg = fs.get_feature_group('admissions')
1178
+ >>> fg
1179
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1180
+
1181
+ # Set the Feature 'programming' inactive.
1182
+ >>> fs.set_features_inactive('programming')
1183
+ True
1184
+ # Get FeatureGroup again after setting feature inactive.
1185
+ >>> fg = fs.get_feature_group('admissions')
1186
+ >>> fg
1187
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1188
+ >>>
1189
+ """
1190
+ return self.__set_active_inactive_features(names, active=False)
1191
+
1192
+ def set_features_active(self, names):
1193
+ """
1194
+ DESCRIPTION:
1195
+ Mark the feature status as active. Set the status as 'inactive' with
1196
+ "set_features_inactive()" method. Note that, inactive features are
1197
+ not available for any further processing.
1198
+
1199
+ PARAMETERS:
1200
+ names:
1201
+ Required Argument.
1202
+ Specifies the name(s) of the feature(s).
1203
+ Types: str OR list of str
1204
+
1205
+ RETURNS:
1206
+ bool
1207
+
1208
+ RAISES:
1209
+ teradataMLException
1210
+
1211
+ EXAMPLES:
1212
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1213
+ # Load the admissions data to Vantage.
1214
+ >>> load_example_data("dataframe", "admissions_train")
1215
+ # Create DataFrame on admissions data.
1216
+ >>> df = DataFrame("admissions_train")
1217
+ >>> df
1218
+ masters gpa stats programming admitted
1219
+ id
1220
+ 34 yes 3.85 Advanced Beginner 0
1221
+ 32 yes 3.46 Advanced Beginner 0
1222
+ 11 no 3.13 Advanced Advanced 1
1223
+ 40 yes 3.95 Novice Beginner 0
1224
+ 38 yes 2.65 Advanced Beginner 1
1225
+ 36 no 3.00 Advanced Novice 0
1226
+ 7 yes 2.33 Novice Novice 1
1227
+ 26 yes 3.57 Advanced Advanced 1
1228
+ 19 yes 1.98 Advanced Advanced 0
1229
+ 13 no 4.00 Advanced Novice 1
1230
+ >>>
1231
+ # Create FeatureGroup from DataFrame df.
1232
+ >>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
1233
+ # Apply the FeatureGroup to FeatureStore 'vfs_v1'.
1234
+ >>> fs = FeatureStore('vfs_v1')
1235
+ >>> fs.apply(fg)
1236
+ True
1237
+ # Get FeatureGroup 'admissions' from FeatureStore.
1238
+ >>> fg = fs.get_feature_group('admissions')
1239
+ >>> fg
1240
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1241
+ # Set the Feature 'programming' inactive.
1242
+ >>> fs.set_features_inactive('programming')
1243
+ True
1244
+ # Get FeatureGroup again after setting feature inactive.
1245
+ >>> fg = fs.get_feature_group('admissions')
1246
+ >>> fg
1247
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1248
+ >>>
1249
+
1250
+ # Mark Feature 'programming' from 'inactive' to 'active'.
1251
+ >>> fs.set_features_active('programming')
1252
+ # Get FeatureGroup again after setting feature active.
1253
+ >>> fg = fs.get_feature_group('admissions')
1254
+ >>> fg
1255
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1256
+ >>>
1257
+ """
1258
+ return self.__set_active_inactive_features(names, active=True)
1259
+
1260
+ def __set_active_inactive_features(self, names, active):
1261
+ """
1262
+ DESCRIPTION:
1263
+ Internal function to either active or inactive features.
1264
+
1265
+ PARAMETERS:
1266
+ names:
1267
+ Required Argument.
1268
+ Specifies the name the feature.
1269
+ Types: str OR list of str
1270
+
1271
+ RETURNS:
1272
+ bool
1273
+
1274
+ RAISES:
1275
+ teradataMLException
1276
+
1277
+ EXAMPLES:
1278
+ # Example 1: Archive the feature 'feature1' in the repo
1279
+ # 'vfs_v1'.
1280
+ >>> from teradataml import FeatureStore
1281
+ >>> fs = FeatureStore('vfs_v1')
1282
+ >>> fs.__archive_unarchive_features(name='feature1')
1283
+ True
1284
+ >>>
1285
+ """
1286
+ names = UtilFuncs._as_list(names)
1287
+
1288
+ argument_validation_params = []
1289
+ argument_validation_params.append(["names", names, False, (str, list), True])
1290
+
1291
+ # Validate argument types
1292
+ _Validators._validate_function_arguments(argument_validation_params)
1293
+
1294
+ status = FeatureStatus.ACTIVE.name if active else FeatureStatus.INACTIVE.name
1295
+
1296
+ _update_data(table_name=EFS_FEATURES_SPEC["table_name"],
1297
+ schema_name=self.__repo,
1298
+ update_columns_values={"status": status},
1299
+ update_conditions={"name": names}
1300
+ )
1301
+ return True
1302
+
1303
+ def apply(self, object):
1304
+ """
1305
+ DESCRIPTION:
1306
+ Register objects to repository.
1307
+
1308
+ PARAMETERS:
1309
+ object:
1310
+ Required Argument.
1311
+ Specifies the object to update the repository.
1312
+ Types: Feature OR DataSource OR Entity OR FeatureGroup.
1313
+
1314
+ RETURNS:
1315
+ bool.
1316
+
1317
+ RAISES:
1318
+ TeradataMLException
1319
+
1320
+ EXAMPLES:
1321
+ >>> load_example_data('dataframe', ['sales'])
1322
+ >>> df = DataFrame("sales")
1323
+
1324
+ # Example 1: create a Feature for column 'Feb' from 'sales' DataFrame
1325
+ # and register with repo 'vfs_v1'.
1326
+ >>> # Create Feature.
1327
+ >>> from teradataml import Feature
1328
+ >>> feature = Feature('sales:Feb', df.Feb)
1329
+ >>> # Register the above Feature with repo.
1330
+ >>> fs = FeatureStore('vfs_v1')
1331
+ >>> fs.apply(feature)
1332
+ True
1333
+ >>>
1334
+
1335
+ # Example 2: create Entity for 'sales' DataFrame and register
1336
+ # with repo 'vfs_v1'.
1337
+ >>> # Create Entity.
1338
+ >>> from teradataml import Entity
1339
+ >>> entity = Entity('sales:accounts', df.accounts)
1340
+ >>> # Register the above Entity with repo.
1341
+ >>> fs = FeatureStore('vfs_v1')
1342
+ >>> fs.apply(entity)
1343
+ True
1344
+ >>>
1345
+
1346
+ # Example 3: create DataSource for 'sales' DataFrame and register
1347
+ # with repo 'vfs_v1'.
1348
+ >>> # Create DataSource.
1349
+ >>> from teradataml import DataSource
1350
+ >>> ds = DataSource('Sales_Data', df)
1351
+ >>> # Register the above DataSource with repo.
1352
+ >>> fs = FeatureStore('vfs_v1')
1353
+ >>> fs.apply(ds)
1354
+ True
1355
+ >>>
1356
+
1357
+ # Example 4: create FeatureStore with all the objects
1358
+ # created in above examples and register with
1359
+ # repo 'vfs_v1'.
1360
+ >>> # Create FeatureGroup.
1361
+ >>> from teradataml import FeatureGroup
1362
+ >>> fg = FeatureGroup('Sales',
1363
+ ... features=feature,
1364
+ ... entity=entity,
1365
+ ... data_source=data_source)
1366
+ >>> # Register the above FeatureStore with repo.
1367
+ >>> fs = FeatureStore('vfs_v1')
1368
+ >>> fs.apply(fg)
1369
+ True
1370
+ >>>
1371
+ """
1372
+ argument_validation_params = []
1373
+ argument_validation_params.append(["name", object, False, (Feature, Entity, DataSource, FeatureGroup)])
1374
+
1375
+ # Validate argument types
1376
+ _Validators._validate_function_arguments(argument_validation_params)
1377
+ return object.publish(self.__repo)
1378
+
1379
+ def get_dataset(self, group_name):
1380
+ """
1381
+ DESCRIPTION:
1382
+ Returns teradataml DataFrame based on "group_name".
1383
+
1384
+ PARAMETERS:
1385
+ group_name:
1386
+ Required Argument.
1387
+ Specifies the name of the feature group.
1388
+ Types: str
1389
+
1390
+ RETURNS:
1391
+ teradataml DataFrame.
1392
+
1393
+ RAISES:
1394
+ TeradataMLException
1395
+
1396
+ EXAMPLES:
1397
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
1398
+ # Load the sales data to Vantage.
1399
+ >>> load_example_data("dataframe", "sales")
1400
+ # Create DataFrame on sales data.
1401
+ >>> df = DataFrame("sales")
1402
+ >>> df
1403
+ >>> df
1404
+ Feb Jan Mar Apr datetime
1405
+ accounts
1406
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
1407
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
1408
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
1409
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
1410
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
1411
+ >>>
1412
+ # Create FeatureGroup with name 'sales' from DataFrame.
1413
+ >>> fg = FeatureGroup.from_DataFrame(
1414
+ ... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
1415
+ # Apply the FeatureGroup to FeatureStore.
1416
+ >>> fs = FeatureStore("vfs_v1")
1417
+ >>> fs.apply(fg)
1418
+ True
1419
+
1420
+ # Get the DataSet for FeatureGroup 'sales'
1421
+ >>> df = fs.get_dataset('sales')
1422
+ >>> df
1423
+ datetime Jan Feb Apr Mar
1424
+ accounts
1425
+ Orange Inc 04/01/2017 NaN 210.0 250.0 NaN
1426
+ Jones LLC 04/01/2017 150.0 200.0 180.0 140.0
1427
+ Blue Inc 04/01/2017 50.0 90.0 101.0 95.0
1428
+ Alpha Co 04/01/2017 200.0 210.0 250.0 215.0
1429
+ Yellow Inc 04/01/2017 NaN 90.0 NaN NaN
1430
+ >>>
1431
+ """
1432
+ # Get the FeatureGroup first and extract all details.
1433
+ feature_group = self.get_feature_group(group_name)
1434
+ columns = [feature.column_name for feature in feature_group.features
1435
+ if feature.status != FeatureStatus.INACTIVE]
1436
+ entity_columns = feature_group.entity.columns
1437
+ source = feature_group.data_source.source
1438
+
1439
+ # Create DF from the source.
1440
+ df = DataFrame.from_query(source)
1441
+
1442
+ # Select the corresponding columns.
1443
+ required_columns = entity_columns + columns
1444
+ if feature_group.data_source.timestamp_col_name:
1445
+ columns = [col for col in columns if col != feature_group.data_source.timestamp_col_name]
1446
+ required_columns = entity_columns + [feature_group.data_source.timestamp_col_name] + columns
1447
+ return df.select(required_columns)
1448
+
1449
+ def __get_feature_group_names(self, name, type_):
1450
+ """
1451
+ DESCRIPTION:
1452
+ Internal function to get the associated group names for
1453
+ Feature or DataSource OR Entity.
1454
+
1455
+ PARAMETERS:
1456
+ name:
1457
+ Required Argument.
1458
+ Specifies the name of the Feature or DataSource or Entity.
1459
+ Types: str
1460
+
1461
+ type_:
1462
+ Required Argument.
1463
+ Specifies the type of the objects stored in feature store.
1464
+ Permitted Values:
1465
+ * feature
1466
+ * data_source
1467
+ * entity
1468
+ Types: str
1469
+
1470
+ RETURNS:
1471
+ list
1472
+
1473
+ RAISES:
1474
+ None
1475
+
1476
+ EXAMPLES:
1477
+ >>> self.__get_feature_group_names('admissions', 'data_source')
1478
+ """
1479
+ if type_ == "feature":
1480
+ df = self.__get_features_df()
1481
+ return [rec.group_name for rec in df[df.name == name].itertuples() if rec.group_name is not None]
1482
+ elif type_ == "data_source":
1483
+ df = self.__get_feature_group_df()
1484
+ return [rec.name for rec in df[df.data_source_name == name].itertuples()]
1485
+ elif type_ == "entity":
1486
+ df = self.__get_feature_group_df()
1487
+ return [rec.name for rec in df[df.entity_name == name].itertuples()]
1488
+
1489
+ def __remove_obj(self, name, type_, action="archive"):
1490
+ """
1491
+ DESCRIPTION:
1492
+ Internal function to get the remove Feature or DataSource OR
1493
+ Entity from repo.
1494
+
1495
+ PARAMETERS:
1496
+ name:
1497
+ Required Argument.
1498
+ Specifies the name of the Feature or DataSource or Entity.
1499
+ Types: str
1500
+
1501
+ type_:
1502
+ Required Argument.
1503
+ Specifies the type of "name".
1504
+ Types: str
1505
+ Permitted Values:
1506
+ * feature
1507
+ * data_source
1508
+ * entity
1509
+
1510
+ action:
1511
+ Optional Argument.
1512
+ Specifies whether to remove from staging tables or not.
1513
+ When set to True, object is removed from staging tables.
1514
+ Otherwise, object is fetched from regular tables.
1515
+ Default Value: True
1516
+ Types: bool
1517
+
1518
+ RETURNS:
1519
+ bool
1520
+
1521
+ RAISES:
1522
+ None
1523
+
1524
+ EXAMPLES:
1525
+ >>> self.__remove_obj('admissions', 'data_source')
1526
+ """
1527
+ _vars = {
1528
+ "data_source": {"class": DataSource, "error_msg": "Update these FeatureGroups with other DataSources"},
1529
+ "entity": {"class": Entity, "error_msg": "Update these FeatureGroups with other Entities"},
1530
+ "feature": {"class": Feature, "error_msg": "Remove the Feature from FeatureGroup"},
1531
+ }
1532
+ c_name_ = _vars[type_]["class"].__name__
1533
+ argument_validation_params = []
1534
+ argument_validation_params.append([type_, name, False, (str, _vars[type_]["class"]), True])
1535
+
1536
+ # Validate argument types
1537
+ _Validators._validate_function_arguments(argument_validation_params)
1538
+ # Extract the name if argument is class type.
1539
+ if isinstance(name, _vars[type_]["class"]):
1540
+ name = name.name
1541
+
1542
+ # Before removing it, check if it is associated with any FeatureGroup.
1543
+ # If yes, raise error. Applicable only for Archive.
1544
+ if action == "archive":
1545
+ feature_groups = self.__get_feature_group_names(name, type_)
1546
+ if feature_groups:
1547
+ feature_groups = ", ".join(("'{}'".format(fg) for fg in feature_groups))
1548
+ message = ("{} '{}' is associated with FeatureGroups {}. {} and try deleting again.".format(
1549
+ c_name_, name, feature_groups, _vars[type_]["error_msg"]))
1550
+ raise TeradataMlException(Messages.get_message(
1551
+ MessageCodes.FUNC_EXECUTION_FAILED, '{}_{}'.format(action, type_), message),
1552
+ MessageCodes.FUNC_EXECUTION_FAILED)
1553
+
1554
+ if type_ == "entity":
1555
+ res = self._remove_entity(name, action)
1556
+ else:
1557
+ table_name = self.__table_names[type_]
1558
+ if action == "delete":
1559
+ table_name = self.__table_names["{}_staging".format(type_)]
1560
+
1561
+ res = _delete_data(table_name=table_name,
1562
+ schema_name=self.__repo,
1563
+ delete_conditions=(Col("name") == name)
1564
+ )
1565
+
1566
+ if res == 1:
1567
+ print("{} '{}' is {}d.".format(c_name_, name, action))
1568
+ return True
1569
+ else:
1570
+ print("{} '{}' does not exist to {}.".format(c_name_, name, action))
1571
+ return False
1572
+
1573
+ @db_transaction
1574
+ def _remove_entity(self, name, action):
1575
+ """
1576
+ DESCRIPTION:
1577
+ Internal function to get the remove Entity from repo.
1578
+
1579
+ PARAMETERS:
1580
+ name:
1581
+ Required Argument.
1582
+ Specifies the name of the Entity.
1583
+ Types: str
1584
+
1585
+ action:
1586
+ Required Argument.
1587
+ Specifies whether to remove from staging tables or not.
1588
+ When set to "delete", Entity is removed from staging tables.
1589
+ Otherwise, Entity is removed from regular tables.
1590
+ Types: str
1591
+
1592
+ RETURNS:
1593
+ bool
1594
+
1595
+ RAISES:
1596
+ None
1597
+
1598
+ EXAMPLES:
1599
+ >>> self._remove_entity('admissions', 'delete')
1600
+ """
1601
+ ent_table = self.__table_names["entity"]
1602
+ ent_table_xref = self.__table_names["entity_xref"]
1603
+ if action == "delete":
1604
+ ent_table = self.__table_names["entity_staging"]
1605
+ ent_table_xref = self.__table_names["entity_staging_xref"]
1606
+
1607
+ # remove it from xref table first.
1608
+ _delete_data(table_name=ent_table_xref,
1609
+ schema_name=self.__repo,
1610
+ delete_conditions=(Col("entity_name") == name)
1611
+ )
1612
+
1613
+ # remove from entity table.
1614
+ res = _delete_data(table_name=ent_table,
1615
+ schema_name=self.__repo,
1616
+ delete_conditions=(Col("name") == name)
1617
+ )
1618
+
1619
+ return res
1620
+
1621
+ def archive_data_source(self, data_source):
1622
+ """
1623
+ DESCRIPTION:
1624
+ Archives DataSource from repository. Note that archived DataSource
1625
+ is not available for any further processing. Archived DataSource can be
1626
+ viewed using "list_archived_data_sources()" method.
1627
+
1628
+ PARAMETERS:
1629
+ data_source:
1630
+ Required Argument.
1631
+ Specifies either the name of DataSource or Object of DataSource
1632
+ to archive from repository.
1633
+ Types: str OR DataSource
1634
+
1635
+ RETURNS:
1636
+ bool
1637
+
1638
+ RAISES:
1639
+ TeradataMLException, TypeError, ValueError
1640
+
1641
+ EXAMPLES:
1642
+ >>> from teradataml import DataSource, FeatureStore, load_example_data
1643
+ # Create a DataSource using SELECT statement.
1644
+ >>> ds = DataSource(name="sales_data", source="select * from sales")
1645
+ # Create FeatureStore for repo 'vfs_v1'.
1646
+ >>> fs = FeatureStore("vfs_v1")
1647
+ # Apply DataSource to FeatureStore.
1648
+ >>> fs.apply(ds)
1649
+ True
1650
+ # List the available DataSources.
1651
+ >>> fs.list_data_sources()
1652
+ description timestamp_col_name source
1653
+ name
1654
+ sales_data None None select * from sales
1655
+
1656
+ # Archive DataSource with name "sales_data".
1657
+ >>> fs.archive_data_source("sales_data")
1658
+ DataSource 'sales_data' is archived.
1659
+ True
1660
+ >>>
1661
+ # List the available DataSources after archive.
1662
+ >>> fs.list_data_sources()
1663
+ Empty DataFrame
1664
+ Columns: [description, timestamp_col_name, source]
1665
+ Index: []
1666
+ """
1667
+ return self.__remove_obj(name=data_source, type_="data_source")
1668
+
1669
+ def delete_data_source(self, data_source):
1670
+ """
1671
+ DESCRIPTION:
1672
+ Removes the archived DataSource from repository.
1673
+
1674
+ PARAMETERS:
1675
+ data_source:
1676
+ Required Argument.
1677
+ Specifies either the name of DataSource or Object of DataSource
1678
+ to remove from repository.
1679
+ Types: str OR DataSource
1680
+
1681
+ RETURNS:
1682
+ bool.
1683
+
1684
+ RAISES:
1685
+ TeradataMLException, TypeError, ValueError
1686
+
1687
+ EXAMPLES:
1688
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1689
+ >>> load_example_data('dataframe', ['sales'])
1690
+ # Create teradataml DataFrame.
1691
+ >>> df = DataFrame("sales")
1692
+ # Create DataSource with source as teradataml DataFrame.
1693
+ >>> ds = DataSource(name="sales_data", source=df)
1694
+ # # Create FeatureStore for repo 'vfs_v1'.
1695
+ >>> fs = FeatureStore("vfs_v1")
1696
+ # Apply the DataSource to FeatureStore.
1697
+ >>> fs.apply(ds)
1698
+ True
1699
+ # Let's first archive the DataSource.
1700
+ >>> fs.archive_data_source("sales_data")
1701
+ DataSource 'sales_data' is archived.
1702
+ True
1703
+
1704
+ # Delete DataSource with name "sales_data".
1705
+ >>> fs.delete_data_source("sales_data")
1706
+ DataSource 'sales_data' is deleted.
1707
+ True
1708
+ >>>
1709
+ """
1710
+ return self.__remove_obj(name=data_source, type_="data_source", action="delete")
1711
+
1712
+ def archive_feature(self, feature):
1713
+ """
1714
+ DESCRIPTION:
1715
+ Archives Feature from repository. Note that archived Feature
1716
+ is not available for any further processing. Archived Feature can be
1717
+ viewed using "list_archived_features()" method.
1718
+
1719
+ PARAMETERS:
1720
+ feature:
1721
+ Required Argument.
1722
+ Specifies either the name of Feature or Object of Feature
1723
+ to archive from repository.
1724
+ Types: str OR Feature
1725
+
1726
+ RETURNS:
1727
+ bool
1728
+
1729
+ RAISES:
1730
+ TeradataMLException, TypeError, ValueError
1731
+
1732
+ EXAMPLES:
1733
+ >>> from teradataml import DataFrame, Feature, FeatureStore
1734
+ >>> load_example_data('dataframe', ['sales'])
1735
+ # Create teradataml DataFrame.
1736
+ >>> df = DataFrame("sales")
1737
+ # Create Feature for Column 'Feb'.
1738
+ >>> feature = Feature(name="sales_data_Feb", column=df.Feb)
1739
+ # Create FeatureStore for the repo 'staging_repo'.
1740
+ >>> fs = FeatureStore("staging_repo")
1741
+ # Apply the Feature to FeatureStore.
1742
+ >>> fs.apply(feature)
1743
+ True
1744
+ # List the available Features.
1745
+ >>> fs.list_features()
1746
+ column_name description creation_time modified_time tags data_type feature_type status group_name
1747
+ name
1748
+ sales_data_Feb Feb None 2024-10-03 18:21:03.720464 None None FLOAT CONTINUOUS ACTIVE None
1749
+
1750
+ # Archive Feature with name "sales_data_Feb".
1751
+ >>> fs.archive_feature(feature=feature)
1752
+ Feature 'sales_data_Feb' is archived.
1753
+ True
1754
+ # List the available Features after archive.
1755
+ >>> fs.list_features()
1756
+ Empty DataFrame
1757
+ Columns: [column_name, description, creation_time, modified_time, tags, data_type, feature_type, status, group_name]
1758
+ Index: []
1759
+ >>>
1760
+ """
1761
+ return self.__remove_obj(name=feature, type_="feature")
1762
+
1763
+ def delete(self):
1764
+ """
1765
+ DESCRIPTION:
1766
+ Removes the FeatureStore and its components from repository.
1767
+ Notes:
1768
+ * The function removes all the associated database objects along with data.
1769
+ Be cautious while using this function.
1770
+ * The function tries to remove the underlying Database also once
1771
+ all the Feature Store objects are removed.
1772
+ * The user must have permission on the database used by this Feature Store
1773
+ * to drop triggers.
1774
+ * to drop the tables.
1775
+ * to drop the Database.
1776
+ * If the user lacks any of the mentioned permissions, Teradata recommends
1777
+ to not use this function.
1778
+
1779
+ PARAMETERS:
1780
+ None
1781
+
1782
+ RETURNS:
1783
+ bool.
1784
+
1785
+ RAISES:
1786
+ None
1787
+
1788
+ EXAMPLES:
1789
+ # Setup FeatureStore for repo 'vfs_v1'.
1790
+ >>> from teradataml import FeatureStore
1791
+ >>> fs = FeatureStore("vfs_v1")
1792
+ >>> fs.setup()
1793
+ True
1794
+ >>> # Delete FeatureStore.
1795
+ >>> fs.delete()
1796
+ True
1797
+ >>>
1798
+ """
1799
+ confirmation = input("The function removes Feature Store and drops the "
1800
+ "corresponding repo also. Are you sure you want to proceed? (Y/N): ")
1801
+
1802
+ if confirmation in ["Y", "y"]:
1803
+ return self.__drop_feature_store_objects(self.__repo)
1804
+
1805
+ return False
1806
+
1807
+ @staticmethod
1808
+ def __drop_feature_store_objects(repo_name):
1809
+ """
1810
+ DESCRIPTION:
1811
+ Removes the FeatureStore and it's components from repository.
1812
+
1813
+ PARAMETERS:
1814
+ repo_name:
1815
+ Required Argument.
1816
+ Specifies the name of the repository.
1817
+ Types: str
1818
+
1819
+ RETURNS:
1820
+ bool
1821
+ """
1822
+ # Drop all the tables and staging tables.
1823
+ tables_ = [
1824
+ EFS_GROUP_FEATURES_SPEC["table_name"],
1825
+ EFS_FEATURE_GROUP_SPEC["table_name"],
1826
+ EFS_FEATURES_SPEC['table_name'],
1827
+ EFS_ENTITY_XREF_SPEC['table_name'],
1828
+ EFS_ENTITY_SPEC["table_name"],
1829
+ EFS_DATA_SOURCE_SPEC["table_name"]
1830
+ ]
1831
+
1832
+ tables_stg_ = [
1833
+ EFS_FEATURES_STAGING_SPEC['table_name'],
1834
+ EFS_ENTITY_STAGING_SPEC["table_name"],
1835
+ EFS_ENTITY_XREF_STAGING_SPEC["table_name"],
1836
+ EFS_DATA_SOURCE_STAGING_SPEC["table_name"],
1837
+ EFS_FEATURE_GROUP_STAGING_SPEC["table_name"],
1838
+ EFS_GROUP_FEATURES_STAGING_SPEC["table_name"]
1839
+ ]
1840
+
1841
+ # Drop all the triggers first. So that tables can be dropped.
1842
+ triggers = ["{}_trg".format(table) for table in tables_]
1843
+ for trigger in triggers:
1844
+ execute_sql("drop trigger {}.{}".format(repo_name, trigger))
1845
+
1846
+ for table in (tables_ + [EFS_VERSION_SPEC["table_name"]] + tables_stg_):
1847
+ db_drop_table(table, schema_name=repo_name)
1848
+
1849
+ execute_sql("DROP DATABASE {}".format(repo_name))
1850
+
1851
+ return True
1852
+
1853
+ def delete_feature(self, feature):
1854
+ """
1855
+ DESCRIPTION:
1856
+ Removes the archived Feature from repository.
1857
+
1858
+ PARAMETERS:
1859
+ feature:
1860
+ Required Argument.
1861
+ Specifies either the name of Feature or Object of Feature
1862
+ to remove from repository.
1863
+ Types: str OR Feature
1864
+
1865
+ RETURNS:
1866
+ bool.
1867
+
1868
+ RAISES:
1869
+ TeradataMLException, TypeError, ValueError
1870
+
1871
+ EXAMPLES:
1872
+ >>> from teradataml import DataFrame, Feature, FeatureStore
1873
+ >>> load_example_data('dataframe', ['sales'])
1874
+ # Create teradataml DataFrame.
1875
+ >>> df = DataFrame("sales")
1876
+ # Create Feature for Column 'Feb'.
1877
+ >>> feature = Feature(name="sales_data_Feb", column=df.Feb)
1878
+ # Create a feature store with name "staging_repo".
1879
+ >>> fs = FeatureStore("staging_repo")
1880
+ # Add the feature created above in the feature store.
1881
+ >>> fs.apply(feature)
1882
+ True
1883
+ # Let's first archive the Feature.
1884
+ >>> fs.archive_feature(feature=feature)
1885
+ Feature 'sales_data_Feb' is archived.
1886
+ True
1887
+
1888
+ # Delete Feature with name "sales_data_Feb".
1889
+ >>> fs.delete_feature(feature=feature)
1890
+ Feature 'sales_data_Feb' is deleted.
1891
+ True
1892
+ >>>
1893
+ """
1894
+ return self.__remove_obj(name=feature, type_="feature", action="delete")
1895
+
1896
+ def archive_entity(self, entity):
1897
+ """
1898
+ DESCRIPTION:
1899
+ Archives Entity from repository. Note that archived Entity
1900
+ is not available for any further processing. Archived Entity can be
1901
+ viewed using "list_archived_entities()" method.
1902
+
1903
+ PARAMETERS:
1904
+ entity:
1905
+ Required Argument.
1906
+ Specifies either the name of Entity or Object of Entity
1907
+ to remove from repository.
1908
+ Types: str OR Entity
1909
+
1910
+ RETURNS:
1911
+ bool.
1912
+
1913
+ RAISES:
1914
+ TeradataMLException, TypeError, ValueError
1915
+
1916
+ EXAMPLES:
1917
+ >>> from teradataml import DataFrame, Entity, FeatureStore
1918
+ >>> load_example_data('dataframe', ['sales'])
1919
+ # Create teradataml DataFrame.
1920
+ >>> df = DataFrame("sales")
1921
+ # Create Entity using teradataml DataFrame Column.
1922
+ >>> entity = Entity(name="sales_data", columns=df.accounts)
1923
+ # Create FeatureStore for repo 'staging_repo'.
1924
+ >>> fs = FeatureStore("staging_repo")
1925
+ # Apply the entity to FeatureStore.
1926
+ >>> fs.apply(entity)
1927
+ True
1928
+ # List all the available entities.
1929
+ >>> fs.list_entities()
1930
+ description
1931
+ name entity_column
1932
+ sales_data accounts None
1933
+
1934
+ # Archive Entity with name "sales_data".
1935
+ >>> fs.archive_entity(entity=entity.name)
1936
+ Entity 'sales_data' is archived.
1937
+ True
1938
+ # List the entities after archive.
1939
+ >>> fs.list_entities()
1940
+ Empty DataFrame
1941
+ Columns: [description]
1942
+ Index: []
1943
+ """
1944
+ return self.__remove_obj(name=entity, type_="entity")
1945
+
1946
+ def delete_entity(self, entity):
1947
+ """
1948
+ DESCRIPTION:
1949
+ Removes archived Entity from repository.
1950
+
1951
+ PARAMETERS:
1952
+ entity:
1953
+ Required Argument.
1954
+ Specifies either the name of Entity or Object of Entity
1955
+ to delete from repository.
1956
+ Types: str OR Entity
1957
+
1958
+ RETURNS:
1959
+ bool.
1960
+
1961
+ RAISES:
1962
+ TeradataMLException, TypeError, ValueError
1963
+
1964
+ EXAMPLES:
1965
+ >>> from teradataml import DataFrame, Entity, FeatureStore
1966
+ >>> load_example_data('dataframe', ['sales'])
1967
+ # Create teradataml DataFrame.
1968
+ >>> df = DataFrame("sales")
1969
+ # Create Entity using teradataml DataFrame Column.
1970
+ >>> entity = Entity(name="sales_data", columns=df.accounts)
1971
+ # Create FeatureStore for repo 'staging_repo'.
1972
+ >>> fs = FeatureStore("staging_repo")
1973
+ # Apply the entity to FeatureStore.
1974
+ >>> fs.apply(entity)
1975
+ True
1976
+ # Let's first archive the entity.
1977
+ >>> fs.archive_entity(entity=entity.name)
1978
+ Entity 'sales_data' is archived.
1979
+ True
1980
+
1981
+ # Delete Entity with name "sales_data".
1982
+ >>> fs.delete_entity(entity=entity.name)
1983
+ Entity 'sales_data' is deleted.
1984
+ True
1985
+ >>>
1986
+ """
1987
+ return self.__remove_obj(name=entity, type_="entity", action="delete")
1988
+
1989
+ def __get_features_where_clause(self, features):
1990
+ """
1991
+ Internal function to prepare a where clause on features df.
1992
+ """
1993
+ col_expr = Col("name") == features[0]
1994
+ for feature in features[1:]:
1995
+ col_expr = ((col_expr) | (Col("name") == feature))
1996
+
1997
+ return col_expr
1998
+
1999
+ def archive_feature_group(self, feature_group):
2000
+ """
2001
+ DESCRIPTION:
2002
+ Archives FeatureGroup from repository. Note that archived FeatureGroup
2003
+ is not available for any further processing. Archived FeatureGroup can be
2004
+ viewed using "list_archived_feature_groups()" method.
2005
+ Note:
2006
+ The function archives the associated Features, Entity and DataSource
2007
+ if they are not associated with any other FeatureGroups.
2008
+
2009
+ PARAMETERS:
2010
+ feature_group:
2011
+ Required Argument.
2012
+ Specifies either the name of FeatureGroup or Object of FeatureGroup
2013
+ to archive from repository.
2014
+ Types: str OR FeatureGroup
2015
+
2016
+ RETURNS:
2017
+ bool.
2018
+
2019
+ RAISES:
2020
+ TeradataMLException, TypeError, ValueError
2021
+
2022
+ EXAMPLES:
2023
+ >>> from teradataml import DataFrame, FeatureGroup, FeatureStore
2024
+ >>> load_example_data('dataframe', ['sales'])
2025
+ # Create teradataml DataFrame.
2026
+ >>> df = DataFrame("sales")
2027
+ # Create FeatureGroup from teradataml DataFrame.
2028
+ >>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_col_name="datetime")
2029
+ # Create FeatureStore for the repo 'staging_repo'.
2030
+ >>> fs = FeatureStore("staging_repo")
2031
+ # Apply FeatureGroup to FeatureStore.
2032
+ >>> fs.apply(fg)
2033
+ True
2034
+ # List all the available FeatureGroups.
2035
+ >>> fs.list_feature_groups()
2036
+ description data_source_name entity_name
2037
+ name
2038
+ sales None sales sales
2039
+
2040
+ # Archive FeatureGroup with name "sales".
2041
+ >>> fs.archive_feature_group(feature_group='sales')
2042
+ FeatureGroup 'sales' is archived.
2043
+ True
2044
+ >>>
2045
+ # List all the available FeatureGroups after archive.
2046
+ >>> fs.list_feature_groups()
2047
+ Empty DataFrame
2048
+ Columns: [description, data_source_name, entity_name]
2049
+ Index: []
2050
+ """
2051
+ argument_validation_params = []
2052
+ argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
2053
+
2054
+ # Validate argument types
2055
+ _Validators._validate_function_arguments(argument_validation_params)
2056
+
2057
+ feature_group_name = feature_group if isinstance(feature_group, str) else feature_group.name
2058
+
2059
+ fg = self.get_feature_group(feature_group_name) if isinstance(feature_group, str) else feature_group
2060
+
2061
+ fg_df = self.list_feature_groups()
2062
+
2063
+ # Find out shared Features. Extract the features which are mapped to
2064
+ # other groups. They can not be deleted.
2065
+ feature_names = [f.name for f in fg.features]
2066
+ features_df = self.list_features()
2067
+ col_expr = self.__get_features_where_clause(feature_names)
2068
+ features_df = features_df[((features_df.group_name != fg.name) & (col_expr))]
2069
+ shared_features = [f.name for f in features_df.drop_duplicate('name').itertuples()]
2070
+ feature_names_to_remove = [f for f in feature_names if f not in shared_features]
2071
+
2072
+ # Find out shared Entities. If entity is not shared, then update 'entity_name'
2073
+ # to update value.
2074
+ entity_name = None
2075
+ ent = fg_df[((fg_df.entity_name == fg.entity.name) & (fg_df.name != fg.name))]
2076
+ recs = ent.shape[0]
2077
+ if recs == 0:
2078
+ entity_name = fg.entity.name
2079
+
2080
+ # Find out shared DataSources. If datasource is not shared, then update 'data_source_name'.
2081
+ data_source_name = None
2082
+ ds_df = fg_df[((fg_df.data_source_name == fg.data_source.name) & (fg_df.name != fg.name))]
2083
+ recs = ds_df.shape[0]
2084
+ if recs == 0:
2085
+ data_source_name = fg.data_source.name
2086
+
2087
+ res = self._archive_feature_group(fg.name, feature_names_to_remove, entity_name, data_source_name)
2088
+
2089
+ if res == 1:
2090
+ print("FeatureGroup '{}' is archived.".format(feature_group_name))
2091
+ return True
2092
+
2093
+ print("FeatureGroup '{}' not exist to archive.".format(feature_group_name))
2094
+ return False
2095
+
2096
+ @db_transaction
2097
+ def _archive_feature_group(self, group_name, feature_names, entity_name, data_source_name):
2098
+ """
2099
+ DESCRIPTION:
2100
+ Internal method to archive FeatureGroup from repository.
2101
+
2102
+ PARAMETERS:
2103
+ group_name:
2104
+ Required Argument.
2105
+ Specifies the name of FeatureGroup to archive from repository.
2106
+ Types: str
2107
+
2108
+ feature_names:
2109
+ Required Argument.
2110
+ Specifies the name of Features to archive from repository.
2111
+ Types: list
2112
+
2113
+ entity_name:
2114
+ Required Argument.
2115
+ Specifies the name of Entity to archive from repository.
2116
+ Types: str
2117
+
2118
+ data_source_name:
2119
+ Required Argument.
2120
+ Specifies the name of DataSource to archive from repository.
2121
+ Types: str
2122
+
2123
+ RETURNS:
2124
+ bool.
2125
+
2126
+ RAISES:
2127
+ OperationalError
2128
+
2129
+ EXAMPLES:
2130
+ >>> self._archive_feature_group("group1", ["feature1"], "entity_name", None)
2131
+ """
2132
+ # Remove data for FeatureGroup from Xref table.
2133
+ # This allows to remove data from other tables.
2134
+ res = _delete_data(schema_name=self.__repo,
2135
+ table_name=EFS_GROUP_FEATURES_SPEC["table_name"],
2136
+ delete_conditions=(Col("group_name") == group_name)
2137
+ )
2138
+
2139
+ # Remove FeatureGroup.
2140
+ res = _delete_data(schema_name=self.__repo,
2141
+ table_name=EFS_FEATURE_GROUP_SPEC["table_name"],
2142
+ delete_conditions=(Col("name") == group_name)
2143
+ )
2144
+
2145
+ # Remove Features.
2146
+ if feature_names:
2147
+ _delete_data(schema_name=self.__repo,
2148
+ table_name=EFS_FEATURES_SPEC["table_name"],
2149
+ delete_conditions=self.__get_features_where_clause(feature_names)
2150
+ )
2151
+
2152
+ # Remove entities.
2153
+ if entity_name:
2154
+ _delete_data(schema_name=self.__repo,
2155
+ table_name=EFS_ENTITY_XREF_SPEC["table_name"],
2156
+ delete_conditions=(Col("entity_name") == entity_name)
2157
+ )
2158
+
2159
+ _delete_data(schema_name=self.__repo,
2160
+ table_name=EFS_ENTITY_SPEC["table_name"],
2161
+ delete_conditions=(Col("name") == entity_name)
2162
+ )
2163
+
2164
+ # Remove DataSource.
2165
+ if data_source_name:
2166
+ _delete_data(schema_name=self.__repo,
2167
+ table_name=EFS_DATA_SOURCE_SPEC["table_name"],
2168
+ delete_conditions=(Col("name") == data_source_name),
2169
+ )
2170
+
2171
+ return res
2172
+
2173
+ @db_transaction
2174
+ def delete_feature_group(self, feature_group):
2175
+ """
2176
+ DESCRIPTION:
2177
+ Removes archived FeatureGroup from repository.
2178
+ Note:
2179
+ Unlike 'archive_feature_group()', this function does not delete the
2180
+ associated Features, Entity and DataSource. One should delete those
2181
+ using 'delete_feature()', 'delete_entity()' and 'delete_data_source()'.
2182
+
2183
+ PARAMETERS:
2184
+ feature_group:
2185
+ Required Argument.
2186
+ Specifies either the name of FeatureGroup or Object of FeatureGroup
2187
+ to delete from repository.
2188
+ Types: str OR FeatureGroup
2189
+
2190
+ RETURNS:
2191
+ bool
2192
+
2193
+ RAISES:
2194
+ TeradataMLException, TypeError, ValueError
2195
+
2196
+ EXAMPLES:
2197
+ >>> from teradataml import DataFrame, FeatureGroup, FeatureStore
2198
+ >>> load_example_data('dataframe', ['sales'])
2199
+ # Create teradataml DataFrame.
2200
+ >>> df = DataFrame("sales")
2201
+ # Create FeatureGroup from teradataml DataFrame.
2202
+ >>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_col_name="datetime")
2203
+ # Create FeatureStore for the repo 'staging_repo'.
2204
+ >>> fs = FeatureStore("staging_repo")
2205
+ # Apply FeatureGroup to FeatureStore.
2206
+ >>> fs.apply(fg)
2207
+ True
2208
+ # Let's first archive FeatureGroup with name "sales".
2209
+ >>> fs.archive_feature_group(feature_group='sales')
2210
+ FeatureGroup 'sales' is archived.
2211
+ True
2212
+
2213
+ # Delete FeatureGroup with name "sales".
2214
+ >>> fs.delete_feature_group(feature_group='sales')
2215
+ FeatureGroup 'sales' is deleted.
2216
+ True
2217
+ >>>
2218
+ """
2219
+ argument_validation_params = []
2220
+ argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
2221
+
2222
+ # Validate argument types
2223
+ _Validators._validate_function_arguments(argument_validation_params)
2224
+
2225
+ fg_name = feature_group if isinstance(feature_group, str) else feature_group.name
2226
+
2227
+ # Remove data for FeatureGroup.
2228
+ _delete_data(table_name=self.__table_names["group_features_staging"],
2229
+ schema_name=self.__repo,
2230
+ delete_conditions=(Col("group_name") == fg_name)
2231
+ )
2232
+
2233
+ res = _delete_data(table_name=self.__table_names["feature_group_staging"],
2234
+ schema_name=self.__repo,
2235
+ delete_conditions=(Col("name") == fg_name)
2236
+ )
2237
+
2238
+ if res == 1:
2239
+ print("FeatureGroup '{}' is deleted.".format(fg_name))
2240
+ return True
2241
+
2242
+ print("FeatureGroup '{}' not exist to delete.".format(fg_name))
2243
+ return False
2244
+
2245
+ def __get_obj_df(self, obj_type):
2246
+ """
2247
+ DESCRIPTION:
2248
+ Internal method to return either Features DataFrame OR Entity DataFrame
2249
+ OR DataSource DataFrame OR FeatureGroup DataFrame.
2250
+
2251
+ PARAMETERS:
2252
+ obj_type
2253
+ Required Argument.
2254
+ Specifies the type of DataFrame to return.
2255
+ Allowed Values:
2256
+ * feature
2257
+ * feature_group
2258
+ * entity
2259
+ * data_source
2260
+ * group_features
2261
+
2262
+ RETURNS:
2263
+ teradataml DataFrame.
2264
+
2265
+ RAISES:
2266
+ None
2267
+
2268
+ EXAMPLES:
2269
+ fs.__get_features_df()
2270
+ """
2271
+ if obj_type not in self.__df_container:
2272
+ from teradataml.dataframe.dataframe import in_schema
2273
+
2274
+ # For feature or feature_staging, join it with xref table
2275
+ # so group name appears while listing features.
2276
+ map_ = {"feature": "group_features", "feature_staging": "group_features_staging"}
2277
+ if obj_type in map_:
2278
+ features = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
2279
+ features_xref = DataFrame(in_schema(self.__repo, self.__table_names[map_[obj_type]])).select(
2280
+ ["feature_name", "group_name"])
2281
+ df = features.join(features_xref, on="name==feature_name", how='left')
2282
+ self.__df_container[obj_type] = df.select(features.columns+["group_name"])
2283
+ # For entity, join with xref table.
2284
+ elif obj_type == "entity" or obj_type == "entity_staging":
2285
+ ent_df = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
2286
+ xref_df = DataFrame(in_schema(self.__repo, self.__table_names["{}_xref".format(obj_type)])).select(
2287
+ ['entity_name', 'entity_column'])
2288
+ df = ent_df.join(xref_df, on="name==entity_name", how="inner")
2289
+ self.__df_container[obj_type] = df.select(ent_df.columns+["entity_column"])
2290
+ else:
2291
+ self.__df_container[obj_type] = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
2292
+
2293
+ return self.__df_container[obj_type]
2294
+
2295
+ def version(self):
2296
+ """
2297
+ DESCRIPTION:
2298
+ Get the FeatureStore version.
2299
+
2300
+ PARAMETERS:
2301
+ None
2302
+
2303
+ RETURNS:
2304
+ str
2305
+
2306
+ RAISES:
2307
+ None
2308
+
2309
+ EXAMPLES:
2310
+ # Example 1: Get the version of FeatureStore version for
2311
+ # the repo 'vfs_v1'.
2312
+ >>> from teradataml import FeatureStore
2313
+ >>> fs = FeatureStore('vfs_v1')
2314
+ >>> fs.version()
2315
+ '1.0.0'
2316
+ >>>
2317
+ """
2318
+ return self.__version