teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (88) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +196 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +79 -4
  6. teradataml/analytics/json_parser/metadata.py +12 -3
  7. teradataml/analytics/json_parser/utils.py +7 -2
  8. teradataml/analytics/sqle/__init__.py +1 -0
  9. teradataml/analytics/table_operator/__init__.py +1 -1
  10. teradataml/analytics/uaf/__init__.py +1 -1
  11. teradataml/analytics/utils.py +4 -0
  12. teradataml/automl/data_preparation.py +3 -2
  13. teradataml/automl/feature_engineering.py +15 -7
  14. teradataml/automl/model_training.py +39 -33
  15. teradataml/common/__init__.py +2 -1
  16. teradataml/common/constants.py +35 -0
  17. teradataml/common/garbagecollector.py +2 -1
  18. teradataml/common/messagecodes.py +8 -2
  19. teradataml/common/messages.py +3 -1
  20. teradataml/common/sqlbundle.py +25 -3
  21. teradataml/common/utils.py +134 -9
  22. teradataml/context/context.py +20 -10
  23. teradataml/data/SQL_Fundamentals.pdf +0 -0
  24. teradataml/data/dataframe_example.json +18 -2
  25. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  26. teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
  27. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  28. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  29. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  30. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  31. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  32. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  33. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  34. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  35. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  36. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  37. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  38. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  39. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  40. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  41. teradataml/data/medical_readings.csv +101 -0
  42. teradataml/data/patient_profile.csv +101 -0
  43. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  44. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  45. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  46. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  47. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  48. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  49. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  50. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  51. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  52. teradataml/data/target_udt_data.csv +8 -0
  53. teradataml/data/templates/open_source_ml.json +3 -2
  54. teradataml/data/vectordistance_example.json +4 -0
  55. teradataml/dataframe/dataframe.py +543 -175
  56. teradataml/dataframe/functions.py +553 -25
  57. teradataml/dataframe/sql.py +184 -15
  58. teradataml/dbutils/dbutils.py +556 -18
  59. teradataml/dbutils/filemgr.py +48 -1
  60. teradataml/lib/aed_0_1.dll +0 -0
  61. teradataml/opensource/__init__.py +1 -1
  62. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  63. teradataml/opensource/_lightgbm.py +950 -0
  64. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  65. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  66. teradataml/opensource/sklearn/__init__.py +0 -1
  67. teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
  68. teradataml/options/__init__.py +7 -23
  69. teradataml/options/configure.py +29 -3
  70. teradataml/scriptmgmt/UserEnv.py +3 -3
  71. teradataml/scriptmgmt/lls_utils.py +74 -21
  72. teradataml/store/__init__.py +13 -0
  73. teradataml/store/feature_store/__init__.py +0 -0
  74. teradataml/store/feature_store/constants.py +291 -0
  75. teradataml/store/feature_store/feature_store.py +2223 -0
  76. teradataml/store/feature_store/models.py +1505 -0
  77. teradataml/store/vector_store/__init__.py +1586 -0
  78. teradataml/table_operators/query_generator.py +3 -0
  79. teradataml/table_operators/table_operator_query_generator.py +3 -1
  80. teradataml/table_operators/table_operator_util.py +37 -38
  81. teradataml/table_operators/templates/dataframe_register.template +69 -0
  82. teradataml/utils/dtypes.py +4 -2
  83. teradataml/utils/validators.py +33 -1
  84. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
  85. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
  86. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  87. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  88. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -0,0 +1,2223 @@
1
+ """
2
+ Copyright (c) 2024 by Teradata Corporation. All rights reserved.
3
+ TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
4
+
5
+ Primary Owner: pradeep.garre@teradata.com
6
+ Secondary Owner: adithya.avvaru@teradata.com
7
+
8
+ This file implements the core framework that allows user to use Teradata Enterprise Feature Store.
9
+ """
10
+
11
+ from sqlalchemy import literal_column
12
+ from teradataml.context.context import get_connection
13
+ from teradataml.common.constants import SQLConstants
14
+ from teradataml.common.exceptions import TeradataMlException
15
+ from teradataml.common.messages import Messages
16
+ from teradataml.common.messagecodes import MessageCodes
17
+ from teradataml.dataframe.sql import _SQLColumnExpression as Col
18
+ from teradataml.dbutils.dbutils import _create_database, _create_table, db_drop_table, execute_sql, Grant, Revoke, _update_data, _delete_data, db_transaction
19
+ from teradataml.store.feature_store.constants import *
20
+ from teradataml.store.feature_store.models import *
21
+ from teradataml.common.sqlbundle import SQLBundle
22
+ from teradataml.utils.validators import _Validators
23
+
24
+
25
+ class FeatureStore:
26
+ """Class for FeatureStore."""
27
+
28
+ def __init__(self, repo):
29
+ """
30
+ DESCRIPTION:
31
+ Method to create FeatureStore in teradataml.
32
+
33
+ PARAMETERS:
34
+ repo:
35
+ Required Argument.
36
+ Specifies the repository name.
37
+ Types: str.
38
+
39
+ RETURNS:
40
+ Object of FeatureStore.
41
+
42
+ RAISES:
43
+ None
44
+
45
+ EXAMPLES:
46
+ >>> # Create FeatureStore for repository 'vfs_v1'.
47
+ >>> from teradataml import FeatureStore
48
+ >>> fs = FeatureStore('vfs_v1')
49
+ >>> fs
50
+ FeatureStore(vfs_v1)-v1.0
51
+ >>>
52
+ """
53
+ argument_validation_params = []
54
+ argument_validation_params.append(["repo", repo, False, (str), True])
55
+
56
+ # Validate argument types
57
+ _Validators._validate_function_arguments(argument_validation_params)
58
+ # Do not validate the existance of repo as it consumes a network call.
59
+ self.__repo = repo
60
+ self.__version = ""
61
+
62
+ # Declare SQLBundle to use it further.
63
+ self.__sql_bundle = SQLBundle()
64
+
65
+ # Store all the DF's here so no need to create these every time.
66
+ self.__df_container = {}
67
+
68
+ # Store the table names here. Then use this where ever required.
69
+ self.__table_names = {name: UtilFuncs._get_qualified_table_name(self.__repo, table_name)
70
+ for name, table_name in EFS_TABLES.items()}
71
+
72
+ # Declare getter's for getting the corresponding DataFrame's.
73
+ self.__get_features_df = lambda : self.__get_obj_df("feature")
74
+ self.__get_archived_features_df = lambda : self.__get_obj_df("feature_staging")
75
+ self.__get_group_features_df = lambda : self.__get_obj_df("group_features")
76
+ self.__get_archived_group_features_df = lambda : self.__get_obj_df("group_features_staging")
77
+ self.__get_feature_group_df = lambda : self.__get_obj_df("feature_group")
78
+ self.__get_archived_feature_group_df = lambda : self.__get_obj_df("feature_group_staging")
79
+ self.__get_entity_df = lambda : self.__get_obj_df("entity")
80
+ self.__get_archived_entity_df = lambda : self.__get_obj_df("entity_staging")
81
+ self.__get_data_source_df = lambda : self.__get_obj_df("data_source")
82
+ self.__get_archived_data_source_df = lambda : self.__get_obj_df("data_source_staging")
83
+
84
+ self.__good_status = "Good"
85
+ self.__bad_status = "Bad"
86
+ self.__repaired_status = "Repaired"
87
+
88
+ @property
89
+ def repo(self):
90
+ """
91
+ DESCRIPTION:
92
+ Get the repository.
93
+
94
+ PARAMETERS:
95
+ None
96
+
97
+ RETURNS:
98
+ str
99
+
100
+ RAISES:
101
+ None
102
+
103
+ EXAMPLES:
104
+ >>> from teradataml import FeatureStore
105
+ >>> fs = FeatureStore('vfs_v1')
106
+ >>> fs.repo
107
+ vfs_v1
108
+ >>>
109
+ """
110
+ return self.__repo
111
+
112
+ @repo.setter
113
+ def repo(self, value):
114
+ """
115
+ DESCRIPTION:
116
+ Set the repository.
117
+
118
+ PARAMETERS:
119
+ value:
120
+ Required Argument.
121
+ Specifies the repository name.
122
+ Types: str.
123
+
124
+ RETURNS:
125
+ None.
126
+
127
+ RAISES:
128
+ None
129
+
130
+ EXAMPLES:
131
+ # Example 1: Create a FeatureStore for repository 'abc' and
132
+ # then change the repository to 'xyz'.
133
+ >>> from teradataml import FeatureStore
134
+ >>> fs = FeatureStore('abc')
135
+ >>> fs.repo = 'xyz'
136
+ >>>
137
+ """
138
+ argument_validation_params = []
139
+ argument_validation_params.append(["value", value, False, (str), True])
140
+
141
+ # Validate argument types
142
+ _Validators._validate_function_arguments(argument_validation_params)
143
+ # remove all entries from container so they will be automatically
144
+ # point to new repo for subsequent API's.
145
+ self.__df_container.clear()
146
+ self.__version = None
147
+
148
+ # Set the repo value.
149
+ self.__repo = value
150
+
151
+ def __repr__(self):
152
+ """
153
+ DESCRIPTION:
154
+ String representation for FeatureStore object.
155
+
156
+ PARAMETERS:
157
+ None
158
+
159
+ RETURNS:
160
+ str
161
+
162
+ RAISES:
163
+ None
164
+ """
165
+ s = "VantageFeatureStore({})".format(self.__repo)
166
+ try:
167
+ version = "-v{}".format(self.__get_version())
168
+ except Exception as e:
169
+ version = ""
170
+ return "{}{}".format(s, version)
171
+
172
+ def __get_version(self):
173
+ """
174
+ DESCRIPTION:
175
+ Internal method to get the FeatureStore version.
176
+
177
+ PARAMETERS:
178
+ None
179
+
180
+ RETURNS:
181
+ str
182
+
183
+ RAISES:
184
+ None
185
+ """
186
+ if not self.__version:
187
+ sql = "SELECT version FROM {}.{}".format(self.__repo, EFS_VERSION_SPEC["table_name"])
188
+ self.__version = next(execute_sql(sql))[0]
189
+ return self.__version
190
+
191
+ @staticmethod
192
+ def list_repos() -> DataFrame:
193
+ """
194
+ DESCRIPTION:
195
+ Function to list down the repositories.
196
+
197
+ PARAMETERS:
198
+ None
199
+
200
+ RETURNS:
201
+ teradataml DataFrame
202
+
203
+ RAISES:
204
+ None
205
+
206
+ EXAMPLES:
207
+ # List down all the FeatureStore repositories.
208
+ >>> FeatureStore.list_repos()
209
+ repos
210
+ 0 vfs_v1
211
+ >>>
212
+ """
213
+ return DataFrame.from_query("select distinct DataBaseName as repos from dbc.tablesV where TableName='{}'".format(
214
+ EFS_VERSION_SPEC["table_name"]))
215
+
216
+ def setup(self, perm_size='10e9', spool_size='10e8'):
217
+ """
218
+ DESCRIPTION:
219
+ Function to setup all the required objects in Vantage for the specified
220
+ repository.
221
+ Note:
222
+ The function checks whether repository exists or not. If not exists,
223
+ it first creates the repository and then creates the corresponding tables.
224
+ Hence make sure the user with which is it connected to Vantage
225
+ has corresponding access rights for creating DataBase and creating
226
+ tables in the corresponding database.
227
+
228
+ PARAMETERS:
229
+ perm_size:
230
+ Optional Argument.
231
+ Specifies the number of bytes to allocate to FeatureStore "repo"
232
+ for permanent space.
233
+ Note:
234
+ Exponential notation can also be used.
235
+ Default Value: 10e9
236
+ Types: str or int
237
+
238
+ spool_size:
239
+ Optional Argument.
240
+ Specifies the number of bytes to allocate to FeatureStore "repo"
241
+ for spool space.
242
+ Note:
243
+ Exponential notation can also be used.
244
+ Default Value: 10e8
245
+ Types: str or int
246
+
247
+ RETURNS:
248
+ bool
249
+
250
+ RAISES:
251
+ TeradatamlException
252
+
253
+ EXAMPLES:
254
+ # Setup FeatureStore for repo 'vfs_v1'.
255
+ >>> from teradataml import FeatureStore
256
+ >>> fs = FeatureStore("vfs_v1")
257
+ >>> fs.setup()
258
+ True
259
+ >>>
260
+ """
261
+
262
+ repo_exists = get_connection().dialect._get_database_names(
263
+ get_connection(), self.__repo)
264
+
265
+ # If repo does not exist, then create it.
266
+ if not repo_exists:
267
+ _create_database(self.__repo, perm_size, spool_size)
268
+
269
+ # Check whether version table exists or not. If exist, assume all
270
+ # tables are available.
271
+ all_tables_exist = get_connection().dialect.has_table(
272
+ get_connection(), EFS_VERSION_SPEC['table_name'], schema=self.__repo)
273
+
274
+ if not all_tables_exist:
275
+ # Create the tables.
276
+ table_specs = [EFS_FEATURES_SPEC,
277
+ EFS_DATA_SOURCE_SPEC,
278
+ EFS_ENTITY_SPEC,
279
+ EFS_ENTITY_XREF_SPEC,
280
+ EFS_FEATURE_GROUP_SPEC,
281
+ EFS_GROUP_FEATURES_SPEC,
282
+ EFS_VERSION_SPEC]
283
+
284
+ staging_table_specs = [
285
+ EFS_FEATURES_STAGING_SPEC,
286
+ EFS_DATA_SOURCE_STAGING_SPEC,
287
+ EFS_ENTITY_STAGING_SPEC,
288
+ EFS_ENTITY_XREF_STAGING_SPEC,
289
+ EFS_GROUP_FEATURES_STAGING_SPEC,
290
+ EFS_FEATURE_GROUP_STAGING_SPEC
291
+ ]
292
+
293
+ triggers_specs = [
294
+ EFS_FEATURES_TRG,
295
+ EFS_GROUP_FEATURES_TRG,
296
+ EFS_FEATURE_GROUP_TRG,
297
+ EFS_DATA_SOURCE_TRG,
298
+ EFS_ENTITY_TRG,
299
+ EFS_ENTITY_XREF_TRG
300
+ ]
301
+
302
+ for table_spec in table_specs + staging_table_specs:
303
+ params_ = {"table_name": table_spec["table_name"],
304
+ "columns": table_spec["columns"],
305
+ "primary_index": table_spec.get("primary_index"),
306
+ "unique": True if table_spec.get("primary_index") else False,
307
+ "schema_name": self.__repo,
308
+ "set_table": False
309
+ }
310
+ if "foreign_keys" in table_spec:
311
+ params_["foreign_key_constraint"] = table_spec.get("foreign_keys")
312
+
313
+ _create_table(**params_)
314
+
315
+ for trigger_spec in triggers_specs:
316
+ execute_sql(trigger_spec.format(schema_name=self.__repo))
317
+
318
+ # After the setup is done, populate the version.
319
+ insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, EFS_VERSION_SPEC["table_name"])
320
+ execute_sql(insert_model, (EFS_VERSION, datetime.datetime.now()))
321
+
322
+ if repo_exists and all_tables_exist:
323
+ print("EFS is already setup for the repo {}.".format(self.__repo))
324
+
325
+ @property
326
+ def grant(self):
327
+ """
328
+ DESCRIPTION:
329
+ Grants access on FeatureStore.
330
+ Note:
331
+ One must have admin access to grant access.
332
+
333
+ PARAMETERS:
334
+ None
335
+
336
+ RETURNS:
337
+ bool
338
+
339
+ RAISES:
340
+ OperationalError
341
+
342
+ EXAMPLES:
343
+ >>> from teradataml import FeatureStore
344
+ # Create FeatureStore for repo 'vfs_v1'.
345
+ >>> fs = FeatureStore("vfs_v1")
346
+ # Setup FeatureStore for this repository.
347
+ >>> fs.setup()
348
+ True
349
+
350
+ # Example 1: Grant read access on FeatureStore to user 'BoB'.
351
+ >>> fs.grant.read('BoB')
352
+ True
353
+
354
+ # Example 2: Grant write access on FeatureStore to user 'BoB'.
355
+ >>> fs.grant.write('BoB')
356
+ True
357
+
358
+ # Example 3: Grant read and write access on FeatureStore to user 'BoB'.
359
+ >>> fs.grant.read_write('BoB')
360
+ True
361
+
362
+ """
363
+ table_names = {name: UtilFuncs._get_qualified_table_name(self.__repo, table_name)
364
+ for name, table_name in EFS_TABLES.items()}
365
+ return Grant(list(table_names.values()))
366
+
367
+ @property
368
+ def revoke(self):
369
+ """
370
+ DESCRIPTION:
371
+ Revokes access on FeatureStore.
372
+ Note:
373
+ One must have admin access to revoke access.
374
+
375
+ PARAMETERS:
376
+ None
377
+
378
+ RETURNS:
379
+ bool
380
+
381
+ RAISES:
382
+ OperationalError
383
+
384
+ EXAMPLES:
385
+ >>> from teradataml import FeatureStore
386
+ # Create FeatureStore for repo 'vfs_v1'.
387
+ >>> fs = FeatureStore("vfs_v1")
388
+ # Setup FeatureStore for this repository.
389
+ >>> fs.setup()
390
+ True
391
+
392
+ # Example 1: Revoke read access on FeatureStore from user 'BoB'.
393
+ >>> fs.revoke.read('BoB')
394
+ True
395
+
396
+ # Example 2: Revoke write access on FeatureStore from user 'BoB'.
397
+ >>> fs.revoke.write('BoB')
398
+ True
399
+
400
+ # Example 3: Revoke read and write access on FeatureStore from user 'BoB'.
401
+ >>> fs.revoke.read_write('BoB')
402
+ True
403
+ """
404
+ table_names = {name: UtilFuncs._get_qualified_table_name(self.__repo, table_name)
405
+ for name, table_name in EFS_TABLES.items()}
406
+ return Revoke(list(table_names.values()))
407
+
408
+ def repair(self):
409
+ """
410
+ DESCRIPTION:
411
+ Repairs the existing repo.
412
+ Notes:
413
+ * The method checks for the corresponding missing database objects which are
414
+ required for FeatureStore. If any of the database object is not available,
415
+ then it tries to create the object.
416
+ * The method repairs only the underlying tables and not data inside the
417
+ corresponding table.
418
+
419
+ PARAMETERS:
420
+ None
421
+
422
+ RETURNS:
423
+ bool
424
+
425
+ RAISES:
426
+ TeradatamlException
427
+
428
+ EXAMPLES:
429
+ # Repair FeatureStore repo 'vfs_v1'.
430
+ >>> from teradataml import FeatureStore
431
+ >>> fs = FeatureStore("vfs_v1")
432
+ >>> fs.repair()
433
+ True
434
+ >>>
435
+ """
436
+
437
+ # Repair Features, Entities and DataSources first. Then FeatureGroup and then Group Features.
438
+ group_features_ = [EFS_GROUP_FEATURES_STAGING_SPEC, EFS_GROUP_FEATURES_SPEC, EFS_GROUP_FEATURES_TRG, "GroupFeatures"]
439
+ feature_group_ = [EFS_FEATURE_GROUP_STAGING_SPEC, EFS_FEATURE_GROUP_SPEC, EFS_FEATURE_GROUP_TRG, "FeatureGroup"]
440
+ featuers_ = [EFS_FEATURES_STAGING_SPEC, EFS_FEATURES_SPEC, EFS_FEATURES_TRG, "Feature"]
441
+ entities_ = [EFS_ENTITY_STAGING_SPEC, EFS_ENTITY_SPEC, EFS_ENTITY_TRG, "Entity"]
442
+ entities_xref_ = [EFS_ENTITY_XREF_STAGING_SPEC, EFS_ENTITY_XREF_SPEC, EFS_ENTITY_XREF_TRG, "EntityXref"]
443
+ data_sources_ = [EFS_DATA_SOURCE_STAGING_SPEC, EFS_DATA_SOURCE_SPEC, EFS_DATA_SOURCE_TRG, "DataSource"]
444
+
445
+
446
+ for staging_table_, table_, trigger, obj_name in (group_features_, feature_group_, featuers_, entities_, entities_xref_, data_sources_):
447
+ status = []
448
+ print("Repairing objects related to {}.".format(obj_name))
449
+
450
+ status.append(self.__try_create_table(staging_table_))
451
+ status.append(self.__try_create_table(table_))
452
+ status.append(self.__try_create_trigger(trigger, "{}_trg".format(table_["table_name"])))
453
+
454
+ # Let user know about status.
455
+ # If any of the status is Bad, then repair is failed.
456
+ # Else, If any of the status is Repaired, then sucessfully repaired.
457
+ # Else no need to repair the object.
458
+ if self.__bad_status in status:
459
+ print("Unable to repair objects related to {}.".format(obj_name))
460
+ else:
461
+ if self.__repaired_status in status:
462
+ print("Successfully repaired objects related to {}.".format(obj_name))
463
+ else:
464
+ print("{} objects are good and do not need any repair.".format(obj_name))
465
+
466
+ # Repair the version table.
467
+ status = self.__try_create_table(EFS_VERSION_SPEC)
468
+ if status == self.__repaired_status:
469
+ # After the setup is done, populate the version.
470
+ insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, EFS_VERSION_SPEC["table_name"])
471
+ execute_sql(insert_model, (EFS_VERSION, datetime.datetime.now()))
472
+
473
+ return True
474
+
475
+ def __try_create_table(self, table_spec):
476
+ """
477
+ DESCRIPTION:
478
+ Internal function to create a table from table spec.
479
+
480
+ PARAMETERS:
481
+ table_spec:
482
+ Required Argument.
483
+ Specifies the spec for the corresponding table.
484
+ Types: dict
485
+
486
+ RETURNS:
487
+ str
488
+ Note:
489
+ Method can return three different values of strings.
490
+ * Good - When table to create already exists.
491
+ * Repaired - When is created.
492
+ * Bad - When table not exists and method unable to create table.
493
+
494
+ RAISES:
495
+ None
496
+
497
+ EXAMPLES:
498
+ self.__try_create_table(EFS_VERSION_SPEC)
499
+ """
500
+ try:
501
+ _create_table(table_spec["table_name"],
502
+ columns=table_spec["columns"],
503
+ primary_index=table_spec.get("primary_index"),
504
+ unique=True if table_spec.get("primary_index") else False,
505
+ schema_name=self.__repo,
506
+ set_table=False)
507
+ return self.__repaired_status
508
+ except Exception as e:
509
+ if "Table '{}' already exists".format(table_spec["table_name"]) in str(e):
510
+ return self.__good_status
511
+ else:
512
+ print(str(e))
513
+ return self.__bad_status
514
+
515
+ def __try_create_trigger(self, trigger_spec, trigger_name):
516
+ """
517
+ DESCRIPTION:
518
+ Internal function to create trigger.
519
+
520
+ PARAMETERS:
521
+ trigger_spec:
522
+ Required Argument.
523
+ Specifies the spec for the corresponding trigger.
524
+ Types: str
525
+
526
+ trigger_name:
527
+ Required Argument.
528
+ Specifies the name of the trigger to create.
529
+ Types: str
530
+
531
+ RETURNS:
532
+ str
533
+ Note:
534
+ Method can return three different values of strings.
535
+ * Good - When trigger to create already exists.
536
+ * Repaired - When trigger is created.
537
+ * Bad - When trigger not exists and method unable to create trigger.
538
+
539
+ RAISES:
540
+ None
541
+
542
+ EXAMPLES:
543
+ self.__try_create_trigger(EFS_FEATURE_TRIGGER_SPEC)
544
+ """
545
+ try:
546
+ execute_sql(trigger_spec.format(schema_name=self.__repo))
547
+ return self.__repaired_status
548
+ except Exception as e:
549
+ if "Trigger '{}' already exists".format(trigger_name) in str(e):
550
+ return self.__good_status
551
+ else:
552
+ print("Unable to create trigger '{}'. Error - {}".format(trigger_name, str(e)))
553
+ return self.__bad_status
554
+
555
+ def list_features(self, archived=False) -> DataFrame:
556
+ """
557
+ DESCRIPTION:
558
+ List all the features.
559
+
560
+ PARAMETERS:
561
+ archived:
562
+ Optional Argument.
563
+ Specifies whether to list effective features or archived features.
564
+ When set to False, effective features in FeatureStore are listed,
565
+ otherwise, archived features are listed.
566
+ Default Value: False
567
+ Types: bool
568
+
569
+ RETURNS:
570
+ teradataml DataFrame
571
+
572
+ RAISES:
573
+ None
574
+
575
+ EXAMPLES:
576
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
577
+ >>> load_example_data('dataframe', 'sales')
578
+ # Create FeatureStore for repo 'vfs_v1'.
579
+ >>> fs = FeatureStore("vfs_v1")
580
+ # Create teradataml DataFrame.
581
+ >>> df = DataFrame("sales")
582
+ # Create a FeatureGroup from teradataml DataFrame.
583
+ >>> fg = FeatureGroup.from_DataFrame(name='sales',
584
+ ... entity_columns='accounts',
585
+ ... df=df,
586
+ ... timestamp_col_name='datetime')
587
+ # Apply the FeatureGroup to FeatureStore.
588
+ >>> fs.apply(fg)
589
+ True
590
+
591
+ # Example 1: List all the effective Features in the repo 'vfs_v1'.
592
+ >>> fs.list_features()
593
+ column_name description creation_time modified_time tags data_type feature_type status group_name
594
+ name
595
+ Mar Mar None 2024-09-30 11:21:43.314118 None None BIGINT CONTINUOUS ACTIVE sales
596
+ Jan Jan None 2024-09-30 11:21:42.655343 None None BIGINT CONTINUOUS ACTIVE sales
597
+ Apr Apr None 2024-09-30 11:21:44.143402 None None BIGINT CONTINUOUS ACTIVE sales
598
+ Feb Feb None 2024-09-30 11:21:41.542627 None None FLOAT CONTINUOUS ACTIVE sales
599
+ >>>
600
+
601
+ # Example 2: List all the archived Features in the repo 'vfs_v1'.
602
+ # Note: Feature can only be archived when it is not associated with any Group.
603
+ # Let's remove Feature 'Feb' from FeatureGroup.
604
+ >>> fg.remove(fs.get_feature('Feb'))
605
+ True
606
+ # Apply the modified FeatureGroup to FeatureStore.
607
+ >>> fs.apply(fg)
608
+ True
609
+ # Archive Feature 'Feb'.
610
+ >>> fs.archive_feature('Feb')
611
+ Feature 'Feb' is archived.
612
+ True
613
+
614
+ # List all the archived Features in the repo 'vfs_v1'.
615
+ >>> fs.list_features(archived=True)
616
+ name column_name description creation_time modified_time tags data_type feature_type status archived_time group_name
617
+ 0 Feb Feb None 2024-09-30 11:21:41.542627 None None FLOAT CONTINUOUS ACTIVE 2024-09-30 11:30:49.160000 sales
618
+ >>>
619
+ """
620
+ return self.__get_archived_features_df() if archived else self.__get_features_df()
621
+
622
+ def list_entities(self, archived=False) -> DataFrame:
623
+ """
624
+ DESCRIPTION:
625
+ List all the entities.
626
+
627
+ PARAMETERS:
628
+ archived:
629
+ Optional Argument.
630
+ Specifies whether to list effective entities or archived entities.
631
+ When set to False, effective entities in FeatureStore are listed,
632
+ otherwise, archived entities are listed.
633
+ Default Value: False
634
+ Types: bool
635
+
636
+ RETURNS:
637
+ teradataml DataFrame
638
+
639
+ RAISES:
640
+ None
641
+
642
+ EXAMPLES:
643
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
644
+ >>> load_example_data('dataframe', 'sales')
645
+ # Create FeatureStore for repo 'vfs_v1'.
646
+ >>> fs = FeatureStore("vfs_v1")
647
+ # Create teradataml DataFrame.
648
+ >>> df = DataFrame("sales")
649
+ # Create a FeatureGroup from teradataml DataFrame.
650
+ >>> fg = FeatureGroup.from_DataFrame(name='sales',
651
+ ... entity_columns='accounts',
652
+ ... df=df,
653
+ ... timestamp_col_name='datetime')
654
+ # Apply the FeatureGroup to FeatureStore.
655
+ >>> fs.apply(fg)
656
+ True
657
+
658
+ # Example 1: List all the effective Entities in the repo 'vfs_v1'.
659
+ >>> fs.list_entities()
660
+ description
661
+ name entity_column
662
+ sales accounts None
663
+ >>>
664
+
665
+ # Example 2: List all the archived Entities in the repo 'vfs_v1'.
666
+ # Note: Entity cannot be archived if it is a part of FeatureGroup.
667
+ # First create another Entity, and update FeatureGroup with
668
+ # other Entity. Then archive Entity 'sales'.
669
+ >>> entity = Entity('store_sales', columns=df.accounts)
670
+ # Update new entity to FeatureGroup.
671
+ >>> fg.apply(entity)
672
+ # Update FeatureGroup to FeatureStore. This will update Entity
673
+ # from 'sales' to 'store_sales' for FeatureGroup 'sales'.
674
+ >>> fs.apply(fg)
675
+ True
676
+ # Let's archive Entity 'sales' since it is not part of any FeatureGroup.
677
+ >>> fs.archive_entity('sales')
678
+ Entity 'sales' is archived.
679
+ True
680
+ >>>
681
+
682
+ # List the archived entities.
683
+ >>> fs.list_entities(archived=True)
684
+ name description creation_time modified_time archived_time entity_column
685
+ 0 sales None 2024-10-18 05:41:36.932856 None 2024-10-18 05:50:00.930000 accounts
686
+ >>>
687
+ """
688
+ return self.__get_archived_entity_df() if archived else self.__get_entity_df()
689
+
690
+ def list_data_sources(self, archived=False) -> DataFrame:
691
+ """
692
+ DESCRIPTION:
693
+ List all the Data Sources.
694
+
695
+ PARAMETERS:
696
+ archived:
697
+ Optional Argument.
698
+ Specifies whether to list effective data sources or archived data sources.
699
+ When set to False, effective data sources in FeatureStore are listed,
700
+ otherwise, archived data sources are listed.
701
+ Default Value: False
702
+ Types: bool
703
+
704
+ RETURNS:
705
+ teradataml DataFrame
706
+
707
+ RAISES:
708
+ None
709
+
710
+ EXAMPLES:
711
+ >>> from teradataml import DataSource, FeatureStore, load_example_data
712
+ >>> load_example_data("dataframe", "admissions_train")
713
+ # Create teradataml DataFrame.
714
+ >>> admissions=DataFrame("admissions_train")
715
+ # Create FeatureStore for repo 'vfs_v1'.
716
+ >>> fs = FeatureStore("vfs_v1")
717
+ # Create DataSource using teradataml DataFrame.
718
+ >>> ds = DataSource(name='admissions', source=admissions)
719
+ # Apply the DataSource to FeatureStore.
720
+ >>> fs.apply(ds)
721
+ True
722
+
723
+ # Example 1: List all the effective DataSources in the repo 'vfs_v1'.
724
+ >>> fs.list_data_sources()
725
+ description timestamp_col_name source
726
+ name
727
+ admissions None None select * from "admissions_train"
728
+ >>>
729
+
730
+ # Example 2: List all the archived DataSources in the repo 'vfs_v1'.
731
+ # Let's first archive the DataSource.
732
+ >>> fs.archive_data_source('admissions')
733
+ DataSource 'admissions' is archived.
734
+ True
735
+ # List archived DataSources.
736
+ >>> fs.list_data_sources(archived=True)
737
+ description timestamp_col_name source archived_time
738
+ name
739
+ admissions None None select * from "admissions_train" 2024-09-30 12:05:39.220000
740
+ >>>
741
+ """
742
+ return self.__get_archived_data_source_df() if archived else self.__get_data_source_df()
743
+
744
+ def list_feature_groups(self, archived=False) -> DataFrame:
745
+ """
746
+ DESCRIPTION:
747
+ List all the FeatureGroups.
748
+
749
+ PARAMETERS:
750
+ archived:
751
+ Optional Argument.
752
+ Specifies whether to list effective feature groups or archived feature groups.
753
+ When set to False, effective feature groups in FeatureStore are listed,
754
+ otherwise, archived feature groups are listed.
755
+ Default Value: False
756
+ Types: bool
757
+
758
+ RETURNS:
759
+ teradataml DataFrame
760
+
761
+ RAISES:
762
+ None
763
+
764
+ EXAMPLES:
765
+ >>> from teradataml import FeatureGroup, FeatureStore, load_example_data
766
+ >>> load_example_data("dataframe", "admissions_train")
767
+ # Create teradataml DataFrame.
768
+ >>> admissions=DataFrame("admissions_train")
769
+ # Create FeatureStore for repo 'vfs_v1'.
770
+ >>> fs = FeatureStore("vfs_v1")
771
+ # Create a FeatureGroup from DataFrame.
772
+ >>> fg = FeatureGroup.from_DataFrame("admissions", df=admissions, entity_columns='id')
773
+ # Apply FeatureGroup to FeatureStore.
774
+ >>> fs.apply(fg)
775
+ True
776
+
777
+ # Example 1: List all the effective FeatureGroups in the repo 'vfs_v1'.
778
+ >>> fs.list_feature_groups()
779
+ description data_source_name entity_name
780
+ name
781
+ admissions None admissions admissions
782
+ >>>
783
+
784
+ # Example 2: List all the archived FeatureGroups in the repo 'vfs_v1'.
785
+ # Let's first archive the FeatureGroup.
786
+ >>> fs.archive_feature_group("admissions")
787
+ True
788
+ >>>
789
+ # List archived FeatureGroups.
790
+ >>> fs.list_feature_groups(archived=True)
791
+ name description data_source_name entity_name archived_time
792
+ 0 admissions None admissions admissions 2024-09-30 12:05:39.220000
793
+ >>>
794
+ """
795
+ return self.__get_archived_feature_group_df() if archived else self.__get_feature_group_df()
796
+
797
+ def get_feature(self, name):
798
+ """
799
+ DESCRIPTION:
800
+ Retrieve the feature.
801
+
802
+ PARAMETERS:
803
+ name:
804
+ Required Argument.
805
+ Specifies the name of the feature to get.
806
+ Types: str
807
+
808
+ RETURNS:
809
+ Feature.
810
+
811
+ RAISES:
812
+ TeradataMLException
813
+
814
+ EXAMPLES:
815
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
816
+ # Load the sales data to Vantage.
817
+ >>> load_example_data("dataframe", "sales")
818
+ # Create DataFrame on sales data.
819
+ >>> df = DataFrame("sales")
820
+ >>> df
821
+ Feb Jan Mar Apr datetime
822
+ accounts
823
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
824
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
825
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
826
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
827
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
828
+ >>>
829
+ # Create Feature for column 'Mar' with name 'sales_mar'.
830
+ >>> feature = Feature('sales_mar', column=df.Mar)
831
+ # Apply the Feature to FeatureStore.
832
+ >>> fs = FeatureStore("vfs_v1")
833
+ >>> fs.apply(feature)
834
+ True
835
+
836
+ # Get the feature 'sales_mar' from repo 'vfs_v1'.
837
+ >>> feature = fs.get_feature('sales_mar')
838
+ >>> feature
839
+ Feature(name=sales_mar)
840
+ >>>
841
+ """
842
+ argument_validation_params = []
843
+ argument_validation_params.append(["name", name, False, (str), True])
844
+
845
+ # Validate argument types
846
+ _Validators._validate_function_arguments(argument_validation_params)
847
+
848
+ df = self.list_features()
849
+ df = df[df.name == name]
850
+
851
+ # Check if a feature with that name exists or not. If not, raise error.
852
+ if df.shape[0] == 0:
853
+ msg_code = MessageCodes.FUNC_EXECUTION_FAILED
854
+ error_msg = Messages.get_message(
855
+ msg_code, "get_feature()", "Feature with name '{}' does not exist.".format(name))
856
+ raise TeradataMlException(error_msg, msg_code)
857
+
858
+ return Feature._from_df(df)
859
+
860
+ def get_group_features(self, group_name):
861
+ """
862
+ DESCRIPTION:
863
+ Get the Features from the given feature group name.
864
+
865
+ PARAMETERS:
866
+ group_name:
867
+ Required Argument.
868
+ Specifies the name of the group the feature belongs to.
869
+ Types: str
870
+
871
+ RETURNS:
872
+ List of Feature objects.
873
+
874
+ RAISES:
875
+ TeradataMLException
876
+
877
+ EXAMPLES:
878
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
879
+ # Load the sales data to Vantage.
880
+ >>> load_example_data("dataframe", "sales")
881
+ # Create DataFrame on sales data.
882
+ >>> df = DataFrame("sales")
883
+ >>> df
884
+ >>> df
885
+ Feb Jan Mar Apr datetime
886
+ accounts
887
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
888
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
889
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
890
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
891
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
892
+ >>>
893
+ # Create FeatureGroup with name 'sales' from DataFrame.
894
+ >>> fg = FeatureGroup.from_DataFrame(
895
+ ... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
896
+ # Apply the FeatureGroup to FeatureStore.
897
+ >>> fs = FeatureStore("vfs_v1")
898
+ >>> fs.apply(fg)
899
+ True
900
+
901
+ # Get all the features belongs to the group 'sales' from repo 'vfs_v1'.
902
+ >>> features = fs.get_group_features('sales')
903
+ >>> features
904
+ [Feature(name=Jan), Feature(name=Feb), Feature(name=Apr), Feature(name=Mar)]
905
+ >>>
906
+ """
907
+ argument_validation_params = []
908
+ argument_validation_params.append(["group_name", group_name, False, (str), True])
909
+
910
+ # Validate argument types
911
+ _Validators._validate_function_arguments(argument_validation_params)
912
+
913
+ # Select active features.
914
+ features_df = self.__get_features_df()
915
+ features_df = features_df[((features_df.status != FeatureStatus.INACTIVE.name) & (features_df.group_name == group_name))]
916
+
917
+ # Check if a feature with that group name exists or not. If not, raise error.
918
+ if features_df.shape[0] == 0:
919
+ msg_code = MessageCodes.FUNC_EXECUTION_FAILED
920
+ error_msg = Messages.get_message(
921
+ msg_code, "get_group_features()", "No features found for group '{}'.".format(group_name))
922
+ raise TeradataMlException(error_msg, msg_code)
923
+
924
+ return Feature._from_df(features_df)
925
+
926
+ def get_feature_group(self, name):
927
+ """
928
+ DESCRIPTION:
929
+ Retrieve the FeatureGroup using name.
930
+
931
+ PARAMETERS:
932
+ name:
933
+ Required Argument.
934
+ Specifies the name of the feature group to be retrieved.
935
+ Types: str
936
+
937
+ RETURNS:
938
+ Object of FeatureGroup
939
+
940
+ RAISES:
941
+ TeradataMLException
942
+
943
+ EXAMPLES:
944
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
945
+ # Load the sales data to Vantage.
946
+ >>> load_example_data("dataframe", "sales")
947
+ # Create DataFrame on sales data.
948
+ >>> df = DataFrame("sales")
949
+ >>> df
950
+ Feb Jan Mar Apr datetime
951
+ accounts
952
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
953
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
954
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
955
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
956
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
957
+ >>>
958
+ # Create FeatureGroup with name 'sales' from DataFrame.
959
+ >>> fg = FeatureGroup.from_DataFrame(
960
+ ... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
961
+ # Apply the FeatureGroup to FeatureStore.
962
+ >>> fs = FeatureStore("vfs_v1")
963
+ >>> fs.apply(fg)
964
+ True
965
+
966
+ # Get FeatureGroup with group name 'sales' from repo 'vfs_v1'.
967
+ >>> fg = fs.get_feature_group('sales')
968
+ >>> fg
969
+ FeatureGroup(sales, features=[Feature(name=Jan), Feature(name=Feb), Feature(name=Apr), Feature(name=Mar)], entity=Entity(name=sales), data_source=DataSource(name=sales))
970
+ >>>
971
+ """
972
+ argument_validation_params = []
973
+ argument_validation_params.append(["name", name, False, (str), True])
974
+
975
+ # Validate argument types
976
+ _Validators._validate_function_arguments(argument_validation_params)
977
+
978
+ df = self.list_feature_groups()
979
+ df = df[df.name == name]
980
+
981
+ # Check if a feature with that name exists or not. If not, raise error.
982
+ if df.shape[0] == 0:
983
+ msg_code = MessageCodes.FUNC_EXECUTION_FAILED
984
+ error_msg = Messages.get_message(
985
+ msg_code, "get_feature_group()", "FeatureGroup with name '{}' does not exist.".format(name))
986
+ raise TeradataMlException(error_msg, msg_code)
987
+
988
+ return FeatureGroup._from_df(df,
989
+ self.__repo,
990
+ self.__get_features_df(),
991
+ self.__get_entity_df(),
992
+ self.__get_data_source_df()
993
+ )
994
+
995
+ def get_entity(self, name):
996
+ """
997
+ DESCRIPTION:
998
+ Get the entity from feature store.
999
+
1000
+ PARAMETERS:
1001
+ name:
1002
+ Required Argument.
1003
+ Specifies the name of the entity.
1004
+ Types: str
1005
+
1006
+ RETURNS:
1007
+ Object of Entity.
1008
+
1009
+ RAISES:
1010
+ None
1011
+
1012
+ EXAMPLES:
1013
+ >>> from teradataml import DataFrame, Entity, FeatureStore, load_example_data
1014
+ # Load the admissions data to Vantage.
1015
+ >>> load_example_data("dataframe", "admissions_train")
1016
+ # Create DataFrame on admissions data.
1017
+ >>> df = DataFrame("admissions_train")
1018
+ >>> df
1019
+ masters gpa stats programming admitted
1020
+ id
1021
+ 34 yes 3.85 Advanced Beginner 0
1022
+ 32 yes 3.46 Advanced Beginner 0
1023
+ 11 no 3.13 Advanced Advanced 1
1024
+ 40 yes 3.95 Novice Beginner 0
1025
+ 38 yes 2.65 Advanced Beginner 1
1026
+ 36 no 3.00 Advanced Novice 0
1027
+ 7 yes 2.33 Novice Novice 1
1028
+ 26 yes 3.57 Advanced Advanced 1
1029
+ 19 yes 1.98 Advanced Advanced 0
1030
+ 13 no 4.00 Advanced Novice 1
1031
+ >>>
1032
+ # Create Entity for column 'id' with name 'admissions_id'.
1033
+ >>> entity = Entity(name='admissions_id', description="Entity for admissions", columns=df.id)
1034
+ # Apply the Entity to FeatureStore 'vfs_v1'.
1035
+ >>> fs = FeatureStore('vfs_v1')
1036
+ >>> fs.apply(entity)
1037
+ True
1038
+ >>>
1039
+
1040
+ # Get the Entity 'admissions_id' from repo 'vfs_v1'
1041
+ >>> entity = fs.get_entity('admissions_id')
1042
+ >>> entity
1043
+ Entity(name=admissions_id)
1044
+ >>>
1045
+ """
1046
+ argument_validation_params = []
1047
+ argument_validation_params.append(["name", name, False, (str), True])
1048
+
1049
+ # Validate argument types
1050
+ _Validators._validate_function_arguments(argument_validation_params)
1051
+
1052
+ df = self.__get_entity_df()
1053
+ df = df[df.name==name]
1054
+
1055
+ # Check if entity with that name exists or not. If not, raise error.
1056
+ if df.shape[0] == 0:
1057
+ msg_code = MessageCodes.FUNC_EXECUTION_FAILED
1058
+ error_msg = Messages.get_message(
1059
+ msg_code, "get_entity()", "Entity with name '{}' does not exist.".format(name))
1060
+ raise TeradataMlException(error_msg, msg_code)
1061
+ return Entity._from_df(df)
1062
+
1063
+ def get_data_source(self, name):
1064
+ """
1065
+ DESCRIPTION:
1066
+ Get the data source from feature store.
1067
+
1068
+ PARAMETERS:
1069
+ name:
1070
+ Required Argument.
1071
+ Specifies the name of the data source.
1072
+ Types: str
1073
+
1074
+ RETURNS:
1075
+ Object of DataSource.
1076
+
1077
+ RAISES:
1078
+ TeradataMLException
1079
+
1080
+ EXAMPLES:
1081
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1082
+ # Load the admissions data to Vantage.
1083
+ >>> load_example_data("dataframe", "admissions_train")
1084
+ # Create DataFrame on admissions data.
1085
+ >>> df = DataFrame("admissions_train")
1086
+ >>> df
1087
+ masters gpa stats programming admitted
1088
+ id
1089
+ 34 yes 3.85 Advanced Beginner 0
1090
+ 32 yes 3.46 Advanced Beginner 0
1091
+ 11 no 3.13 Advanced Advanced 1
1092
+ 40 yes 3.95 Novice Beginner 0
1093
+ 38 yes 2.65 Advanced Beginner 1
1094
+ 36 no 3.00 Advanced Novice 0
1095
+ 7 yes 2.33 Novice Novice 1
1096
+ 26 yes 3.57 Advanced Advanced 1
1097
+ 19 yes 1.98 Advanced Advanced 0
1098
+ 13 no 4.00 Advanced Novice 1
1099
+ >>>
1100
+ # Create DataSource using DataFrame 'df' with name 'admissions'.
1101
+ >>> ds = DataSource('admissions', source=df)
1102
+ # Apply the DataSource to FeatureStore 'vfs_v1'.
1103
+ >>> fs = FeatureStore('vfs_v1')
1104
+ >>> fs.apply(ds)
1105
+ True
1106
+ >>>
1107
+
1108
+ # Get the DataSource 'admissions' from repo 'vfs_v1'
1109
+ >>> ds = fs.get_data_source('admissions')
1110
+ >>> ds
1111
+ DataSource(name=admissions)
1112
+ >>>
1113
+ """
1114
+ argument_validation_params = []
1115
+ argument_validation_params.append(["name", name, False, (str), True])
1116
+
1117
+ # Validate argument types
1118
+ _Validators._validate_function_arguments(argument_validation_params)
1119
+
1120
+ df = self.__get_data_source_df()
1121
+ df = df[df.name == name]
1122
+
1123
+ # Check if a entity with that name exists or not. If not, raise error.
1124
+ if df.shape[0] == 0:
1125
+ msg_code = MessageCodes.FUNC_EXECUTION_FAILED
1126
+ error_msg = Messages.get_message(
1127
+ msg_code, "get_data_source()", "DataSource with name '{}' does not exist.".format(name))
1128
+ raise TeradataMlException(error_msg, msg_code)
1129
+
1130
+ return DataSource._from_df(df)
1131
+
1132
+ def set_features_inactive(self, names):
1133
+ """
1134
+ DESCRIPTION:
1135
+ Mark the feature status as 'inactive'. Note that, inactive features are
1136
+ not available for any further processing. Set the status as 'active' with
1137
+ "set_features_active()" method.
1138
+
1139
+ PARAMETERS:
1140
+ names:
1141
+ Required Argument.
1142
+ Specifies the name(s) of the feature(s).
1143
+ Types: str OR list of str
1144
+
1145
+ RETURNS:
1146
+ bool
1147
+
1148
+ RAISES:
1149
+ teradataMLException
1150
+
1151
+ EXAMPLES:
1152
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1153
+ # Load the admissions data to Vantage.
1154
+ >>> load_example_data("dataframe", "admissions_train")
1155
+ # Create DataFrame on admissions data.
1156
+ >>> df = DataFrame("admissions_train")
1157
+ >>> df
1158
+ masters gpa stats programming admitted
1159
+ id
1160
+ 34 yes 3.85 Advanced Beginner 0
1161
+ 32 yes 3.46 Advanced Beginner 0
1162
+ 11 no 3.13 Advanced Advanced 1
1163
+ 40 yes 3.95 Novice Beginner 0
1164
+ 38 yes 2.65 Advanced Beginner 1
1165
+ 36 no 3.00 Advanced Novice 0
1166
+ 7 yes 2.33 Novice Novice 1
1167
+ 26 yes 3.57 Advanced Advanced 1
1168
+ 19 yes 1.98 Advanced Advanced 0
1169
+ 13 no 4.00 Advanced Novice 1
1170
+ >>>
1171
+ # Create FeatureGroup from DataFrame df.
1172
+ >>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
1173
+ # Apply the FeatureGroup to FeatureStore 'vfs_v1'.
1174
+ >>> fs = FeatureStore('vfs_v1')
1175
+ >>> fs.apply(fg)
1176
+ True
1177
+ # Get FeatureGroup 'admissions' from FeatureStore.
1178
+ >>> fg = fs.get_feature_group('admissions')
1179
+ >>> fg
1180
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1181
+
1182
+ # Set the Feature 'programming' inactive.
1183
+ >>> fs.set_features_inactive('programming')
1184
+ True
1185
+ # Get FeatureGroup again after setting feature inactive.
1186
+ >>> fg = fs.get_feature_group('admissions')
1187
+ >>> fg
1188
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1189
+ >>>
1190
+ """
1191
+ return self.__set_active_inactive_features(names, active=False)
1192
+
1193
+ def set_features_active(self, names):
1194
+ """
1195
+ DESCRIPTION:
1196
+ Mark the feature status as active. Set the status as 'inactive' with
1197
+ "set_features_inactive()" method. Note that, inactive features are
1198
+ not available for any further processing.
1199
+
1200
+ PARAMETERS:
1201
+ names:
1202
+ Required Argument.
1203
+ Specifies the name(s) of the feature(s).
1204
+ Types: str OR list of str
1205
+
1206
+ RETURNS:
1207
+ bool
1208
+
1209
+ RAISES:
1210
+ teradataMLException
1211
+
1212
+ EXAMPLES:
1213
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1214
+ # Load the admissions data to Vantage.
1215
+ >>> load_example_data("dataframe", "admissions_train")
1216
+ # Create DataFrame on admissions data.
1217
+ >>> df = DataFrame("admissions_train")
1218
+ >>> df
1219
+ masters gpa stats programming admitted
1220
+ id
1221
+ 34 yes 3.85 Advanced Beginner 0
1222
+ 32 yes 3.46 Advanced Beginner 0
1223
+ 11 no 3.13 Advanced Advanced 1
1224
+ 40 yes 3.95 Novice Beginner 0
1225
+ 38 yes 2.65 Advanced Beginner 1
1226
+ 36 no 3.00 Advanced Novice 0
1227
+ 7 yes 2.33 Novice Novice 1
1228
+ 26 yes 3.57 Advanced Advanced 1
1229
+ 19 yes 1.98 Advanced Advanced 0
1230
+ 13 no 4.00 Advanced Novice 1
1231
+ >>>
1232
+ # Create FeatureGroup from DataFrame df.
1233
+ >>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
1234
+ # Apply the FeatureGroup to FeatureStore 'vfs_v1'.
1235
+ >>> fs = FeatureStore('vfs_v1')
1236
+ >>> fs.apply(fg)
1237
+ True
1238
+ # Get FeatureGroup 'admissions' from FeatureStore.
1239
+ >>> fg = fs.get_feature_group('admissions')
1240
+ >>> fg
1241
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1242
+ # Set the Feature 'programming' inactive.
1243
+ >>> fs.set_features_inactive('programming')
1244
+ True
1245
+ # Get FeatureGroup again after setting feature inactive.
1246
+ >>> fg = fs.get_feature_group('admissions')
1247
+ >>> fg
1248
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1249
+ >>>
1250
+
1251
+ # Mark Feature 'programming' from 'inactive' to 'active'.
1252
+ >>> fs.set_features_active('programming')
1253
+ # Get FeatureGroup again after setting feature active.
1254
+ >>> fg = fs.get_feature_group('admissions')
1255
+ >>> fg
1256
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1257
+ >>>
1258
+ """
1259
+ return self.__set_active_inactive_features(names, active=True)
1260
+
1261
+ def __set_active_inactive_features(self, names, active):
1262
+ """
1263
+ DESCRIPTION:
1264
+ Internal function to either active or inactive features.
1265
+
1266
+ PARAMETERS:
1267
+ names:
1268
+ Required Argument.
1269
+ Specifies the name the feature.
1270
+ Types: str OR list of str
1271
+
1272
+ RETURNS:
1273
+ bool
1274
+
1275
+ RAISES:
1276
+ teradataMLException
1277
+
1278
+ EXAMPLES:
1279
+ # Example 1: Archive the feature 'feature1' in the repo
1280
+ # 'vfs_v1'.
1281
+ >>> from teradataml import FeatureStore
1282
+ >>> fs = FeatureStore('vfs_v1')
1283
+ >>> fs.__archive_unarchive_features(name='feature1')
1284
+ True
1285
+ >>>
1286
+ """
1287
+ names = UtilFuncs._as_list(names)
1288
+
1289
+ argument_validation_params = []
1290
+ argument_validation_params.append(["names", names, False, (str, list), True])
1291
+
1292
+ # Validate argument types
1293
+ _Validators._validate_function_arguments(argument_validation_params)
1294
+
1295
+ status = FeatureStatus.ACTIVE.name if active else FeatureStatus.INACTIVE.name
1296
+
1297
+ _update_data(table_name=EFS_FEATURES_SPEC["table_name"],
1298
+ schema_name=self.__repo,
1299
+ update_columns_values={"status": status},
1300
+ update_conditions={"name": names}
1301
+ )
1302
+ return True
1303
+
1304
+ def apply(self, object):
1305
+ """
1306
+ DESCRIPTION:
1307
+ Register objects to repository.
1308
+
1309
+ PARAMETERS:
1310
+ object:
1311
+ Required Argument.
1312
+ Specifies the object to update the repository.
1313
+ Types: Feature OR DataSource OR Entity OR FeatureGroup.
1314
+
1315
+ RETURNS:
1316
+ bool.
1317
+
1318
+ RAISES:
1319
+ TeradataMLException
1320
+
1321
+ EXAMPLES:
1322
+ >>> load_example_data('dataframe', ['sales'])
1323
+ >>> df = DataFrame("sales")
1324
+
1325
+ # Example 1: create a Feature for column 'Feb' from 'sales' DataFrame
1326
+ # and register with repo 'vfs_v1'.
1327
+ >>> # Create Feature.
1328
+ >>> from teradataml import Feature
1329
+ >>> feature = Feature('sales:Feb', df.Feb)
1330
+ >>> # Register the above Feature with repo.
1331
+ >>> fs = FeatureStore('vfs_v1')
1332
+ >>> fs.apply(feature)
1333
+ True
1334
+ >>>
1335
+
1336
+ # Example 2: create Entity for 'sales' DataFrame and register
1337
+ # with repo 'vfs_v1'.
1338
+ >>> # Create Entity.
1339
+ >>> from teradataml import Entity
1340
+ >>> entity = Entity('sales:accounts', df.accounts)
1341
+ >>> # Register the above Entity with repo.
1342
+ >>> fs = FeatureStore('vfs_v1')
1343
+ >>> fs.apply(entity)
1344
+ True
1345
+ >>>
1346
+
1347
+ # Example 3: create DataSource for 'sales' DataFrame and register
1348
+ # with repo 'vfs_v1'.
1349
+ >>> # Create DataSource.
1350
+ >>> from teradataml import DataSource
1351
+ >>> ds = DataSource('Sales_Data', df)
1352
+ >>> # Register the above DataSource with repo.
1353
+ >>> fs = FeatureStore('vfs_v1')
1354
+ >>> fs.apply(ds)
1355
+ True
1356
+ >>>
1357
+
1358
+ # Example 4: create FeatureStore with all the objects
1359
+ # created in above examples and register with
1360
+ # repo 'vfs_v1'.
1361
+ >>> # Create FeatureGroup.
1362
+ >>> from teradataml import FeatureGroup
1363
+ >>> fg = FeatureGroup('Sales',
1364
+ ... features=feature,
1365
+ ... entity=entity,
1366
+ ... data_source=data_source)
1367
+ >>> # Register the above FeatureStore with repo.
1368
+ >>> fs = FeatureStore('vfs_v1')
1369
+ >>> fs.apply(fg)
1370
+ True
1371
+ >>>
1372
+ """
1373
+ argument_validation_params = []
1374
+ argument_validation_params.append(["name", object, False, (Feature, Entity, DataSource, FeatureGroup)])
1375
+
1376
+ # Validate argument types
1377
+ _Validators._validate_function_arguments(argument_validation_params)
1378
+ return object.publish(self.__repo)
1379
+
1380
+ def get_dataset(self, group_name):
1381
+ """
1382
+ DESCRIPTION:
1383
+ Returns teradataml DataFrame based on "group_name".
1384
+
1385
+ PARAMETERS:
1386
+ group_name:
1387
+ Required Argument.
1388
+ Specifies the name of the feature group.
1389
+ Types: str
1390
+
1391
+ RETURNS:
1392
+ teradataml DataFrame.
1393
+
1394
+ RAISES:
1395
+ TeradataMLException
1396
+
1397
+ EXAMPLES:
1398
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
1399
+ # Load the sales data to Vantage.
1400
+ >>> load_example_data("dataframe", "sales")
1401
+ # Create DataFrame on sales data.
1402
+ >>> df = DataFrame("sales")
1403
+ >>> df
1404
+ >>> df
1405
+ Feb Jan Mar Apr datetime
1406
+ accounts
1407
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
1408
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
1409
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
1410
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
1411
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
1412
+ >>>
1413
+ # Create FeatureGroup with name 'sales' from DataFrame.
1414
+ >>> fg = FeatureGroup.from_DataFrame(
1415
+ ... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
1416
+ # Apply the FeatureGroup to FeatureStore.
1417
+ >>> fs = FeatureStore("vfs_v1")
1418
+ >>> fs.apply(fg)
1419
+ True
1420
+
1421
+ # Get the DataSet for FeatureGroup 'sales'
1422
+ >>> df = fs.get_dataset('sales')
1423
+ >>> df
1424
+ datetime Jan Feb Apr Mar
1425
+ accounts
1426
+ Orange Inc 04/01/2017 NaN 210.0 250.0 NaN
1427
+ Jones LLC 04/01/2017 150.0 200.0 180.0 140.0
1428
+ Blue Inc 04/01/2017 50.0 90.0 101.0 95.0
1429
+ Alpha Co 04/01/2017 200.0 210.0 250.0 215.0
1430
+ Yellow Inc 04/01/2017 NaN 90.0 NaN NaN
1431
+ >>>
1432
+ """
1433
+ # Get the FeatureGroup first and extract all details.
1434
+ feature_group = self.get_feature_group(group_name)
1435
+ columns = [feature.column_name for feature in feature_group.features
1436
+ if feature.status != FeatureStatus.INACTIVE]
1437
+ entity_columns = feature_group.entity.columns
1438
+ source = feature_group.data_source.source
1439
+
1440
+ # Create DF from the source.
1441
+ df = DataFrame.from_query(source)
1442
+
1443
+ # Select the corresponding columns.
1444
+ required_columns = entity_columns + columns
1445
+ if feature_group.data_source.timestamp_col_name:
1446
+ columns = [col for col in columns if col != feature_group.data_source.timestamp_col_name]
1447
+ required_columns = entity_columns + [feature_group.data_source.timestamp_col_name] + columns
1448
+ return df.select(required_columns)
1449
+
1450
+ def __get_feature_group_names(self, name, type_):
1451
+ """
1452
+ DESCRIPTION:
1453
+ Internal function to get the associated group names for
1454
+ Feature or DataSource OR Entity.
1455
+
1456
+ PARAMETERS:
1457
+ name:
1458
+ Required Argument.
1459
+ Specifies the name of the Feature or DataSource or Entity.
1460
+ Types: str
1461
+
1462
+ type_:
1463
+ Required Argument.
1464
+ Specifies the type of the objects stored in feature store.
1465
+ Permitted Values:
1466
+ * feature
1467
+ * data_source
1468
+ * entity
1469
+ Types: str
1470
+
1471
+ RETURNS:
1472
+ list
1473
+
1474
+ RAISES:
1475
+ None
1476
+
1477
+ EXAMPLES:
1478
+ >>> self.__get_feature_group_names('admissions', 'data_source')
1479
+ """
1480
+ if type_ == "feature":
1481
+ df = self.__get_features_df()
1482
+ return [rec.group_name for rec in df[df.name == name].itertuples() if rec.group_name is not None]
1483
+ elif type_ == "data_source":
1484
+ df = self.__get_feature_group_df()
1485
+ return [rec.name for rec in df[df.data_source_name == name].itertuples()]
1486
+ elif type_ == "entity":
1487
+ df = self.__get_feature_group_df()
1488
+ return [rec.name for rec in df[df.entity_name == name].itertuples()]
1489
+
1490
+ def __remove_obj(self, name, type_, action="archive"):
1491
+ """
1492
+ DESCRIPTION:
1493
+ Internal function to get the remove Feature or DataSource OR
1494
+ Entity from repo.
1495
+
1496
+ PARAMETERS:
1497
+ name:
1498
+ Required Argument.
1499
+ Specifies the name of the Feature or DataSource or Entity.
1500
+ Types: str
1501
+
1502
+ type_:
1503
+ Required Argument.
1504
+ Specifies the type of "name".
1505
+ Types: str
1506
+ Permitted Values:
1507
+ * feature
1508
+ * data_source
1509
+ * entity
1510
+
1511
+ action:
1512
+ Optional Argument.
1513
+ Specifies whether to remove from staging tables or not.
1514
+ When set to True, object is removed from staging tables.
1515
+ Otherwise, object is fetched from regular tables.
1516
+ Default Value: True
1517
+ Types: bool
1518
+
1519
+ RETURNS:
1520
+ bool
1521
+
1522
+ RAISES:
1523
+ None
1524
+
1525
+ EXAMPLES:
1526
+ >>> self.__remove_obj('admissions', 'data_source')
1527
+ """
1528
+ _vars = {
1529
+ "data_source": {"class": DataSource, "error_msg": "Update these FeatureGroups with other DataSources"},
1530
+ "entity": {"class": Entity, "error_msg": "Update these FeatureGroups with other Entities"},
1531
+ "feature": {"class": Feature, "error_msg": "Remove the Feature from FeatureGroup"},
1532
+ }
1533
+ c_name_ = _vars[type_]["class"].__name__
1534
+ argument_validation_params = []
1535
+ argument_validation_params.append([type_, name, False, (str, _vars[type_]["class"]), True])
1536
+
1537
+ # Validate argument types
1538
+ _Validators._validate_function_arguments(argument_validation_params)
1539
+ # Extract the name if argument is class type.
1540
+ if isinstance(name, _vars[type_]["class"]):
1541
+ name = name.name
1542
+
1543
+ # Before removing it, check if it is associated with any FeatureGroup.
1544
+ # If yes, raise error. Applicable only for Archive.
1545
+ if action == "archive":
1546
+ feature_groups = self.__get_feature_group_names(name, type_)
1547
+ if feature_groups:
1548
+ feature_groups = ", ".join(("'{}'".format(fg) for fg in feature_groups))
1549
+ message = ("{} '{}' is associated with FeatureGroups {}. {} and try deleting again.".format(
1550
+ c_name_, name, feature_groups, _vars[type_]["error_msg"]))
1551
+ raise TeradataMlException(Messages.get_message(
1552
+ MessageCodes.FUNC_EXECUTION_FAILED, '{}_{}'.format(action, type_), message),
1553
+ MessageCodes.FUNC_EXECUTION_FAILED)
1554
+
1555
+ if type_ == "entity":
1556
+ res = self._remove_entity(name, action)
1557
+ else:
1558
+ table_name = self.__table_names[type_]
1559
+ if action == "delete":
1560
+ table_name = self.__table_names["{}_staging".format(type_)]
1561
+
1562
+ res = _delete_data(table_name=table_name,
1563
+ delete_conditions=(Col("name") == name)
1564
+ )
1565
+
1566
+ if res == 1:
1567
+ print("{} '{}' is {}d.".format(c_name_, name, action))
1568
+ return True
1569
+ else:
1570
+ print("{} '{}' does not exist to {}.".format(c_name_, name, action))
1571
+ return False
1572
+
1573
+ @db_transaction
1574
+ def _remove_entity(self, name, action):
1575
+ """
1576
+ DESCRIPTION:
1577
+ Internal function to get the remove Entity from repo.
1578
+
1579
+ PARAMETERS:
1580
+ name:
1581
+ Required Argument.
1582
+ Specifies the name of the Entity.
1583
+ Types: str
1584
+
1585
+ action:
1586
+ Required Argument.
1587
+ Specifies whether to remove from staging tables or not.
1588
+ When set to "delete", Entity is removed from staging tables.
1589
+ Otherwise, Entity is removed from regular tables.
1590
+ Types: str
1591
+
1592
+ RETURNS:
1593
+ bool
1594
+
1595
+ RAISES:
1596
+ None
1597
+
1598
+ EXAMPLES:
1599
+ >>> self._remove_entity('admissions', 'delete')
1600
+ """
1601
+ ent_table = self.__table_names["entity"]
1602
+ ent_table_xref = self.__table_names["entity_xref"]
1603
+ if action == "delete":
1604
+ ent_table = self.__table_names["entity_staging"]
1605
+ ent_table_xref = self.__table_names["entity_staging_xref"]
1606
+
1607
+ # remove it from xref table first.
1608
+ _delete_data(table_name=ent_table_xref,
1609
+ delete_conditions=(Col("entity_name") == name)
1610
+ )
1611
+
1612
+ # remove from entity table.
1613
+ res = _delete_data(table_name=ent_table,
1614
+ delete_conditions=(Col("name") == name)
1615
+ )
1616
+
1617
+ return res
1618
+
1619
+ def archive_data_source(self, data_source):
1620
+ """
1621
+ DESCRIPTION:
1622
+ Archives DataSource from repository. Note that archived DataSource
1623
+ is not available for any further processing. Archived DataSource can be
1624
+ viewed using "list_archived_data_sources()" method.
1625
+
1626
+ PARAMETERS:
1627
+ data_source:
1628
+ Required Argument.
1629
+ Specifies either the name of DataSource or Object of DataSource
1630
+ to archive from repository.
1631
+ Types: str OR DataSource
1632
+
1633
+ RETURNS:
1634
+ bool
1635
+
1636
+ RAISES:
1637
+ TeradataMLException, TypeError, ValueError
1638
+
1639
+ EXAMPLES:
1640
+ >>> from teradataml import DataSource, FeatureStore, load_example_data
1641
+ # Create a DataSource using SELECT statement.
1642
+ >>> ds = DataSource(name="sales_data", source="select * from sales")
1643
+ # Create FeatureStore for repo 'vfs_v1'.
1644
+ >>> fs = FeatureStore("vfs_v1")
1645
+ # Apply DataSource to FeatureStore.
1646
+ >>> fs.apply(ds)
1647
+ True
1648
+ # List the available DataSources.
1649
+ >>> fs.list_data_sources()
1650
+ description timestamp_col_name source
1651
+ name
1652
+ sales_data None None select * from sales
1653
+
1654
+ # Archive DataSource with name "sales_data".
1655
+ >>> fs.archive_data_source("sales_data")
1656
+ DataSource 'sales_data' is archived.
1657
+ True
1658
+ >>>
1659
+ # List the available DataSources after archive.
1660
+ >>> fs.list_data_sources()
1661
+ Empty DataFrame
1662
+ Columns: [description, timestamp_col_name, source]
1663
+ Index: []
1664
+ """
1665
+ return self.__remove_obj(name=data_source, type_="data_source")
1666
+
1667
+ def delete_data_source(self, data_source):
1668
+ """
1669
+ DESCRIPTION:
1670
+ Removes the archived DataSource from repository.
1671
+
1672
+ PARAMETERS:
1673
+ data_source:
1674
+ Required Argument.
1675
+ Specifies either the name of DataSource or Object of DataSource
1676
+ to remove from repository.
1677
+ Types: str OR DataSource
1678
+
1679
+ RETURNS:
1680
+ bool.
1681
+
1682
+ RAISES:
1683
+ TeradataMLException, TypeError, ValueError
1684
+
1685
+ EXAMPLES:
1686
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1687
+ >>> load_example_data('dataframe', ['sales'])
1688
+ # Create teradataml DataFrame.
1689
+ >>> df = DataFrame("sales")
1690
+ # Create DataSource with source as teradataml DataFrame.
1691
+ >>> ds = DataSource(name="sales_data", source=df)
1692
+ # # Create FeatureStore for repo 'vfs_v1'.
1693
+ >>> fs = FeatureStore("vfs_v1")
1694
+ # Apply the DataSource to FeatureStore.
1695
+ >>> fs.apply(ds)
1696
+ True
1697
+ # Let's first archive the DataSource.
1698
+ >>> fs.archive_data_source("sales_data")
1699
+ DataSource 'sales_data' is archived.
1700
+ True
1701
+
1702
+ # Delete DataSource with name "sales_data".
1703
+ >>> fs.delete_data_source("sales_data")
1704
+ DataSource 'sales_data' is deleted.
1705
+ True
1706
+ >>>
1707
+ """
1708
+ return self.__remove_obj(name=data_source, type_="data_source", action="delete")
1709
+
1710
+ def archive_feature(self, feature):
1711
+ """
1712
+ DESCRIPTION:
1713
+ Archives Feature from repository. Note that archived Feature
1714
+ is not available for any further processing. Archived Feature can be
1715
+ viewed using "list_archived_features()" method.
1716
+
1717
+ PARAMETERS:
1718
+ feature:
1719
+ Required Argument.
1720
+ Specifies either the name of Feature or Object of Feature
1721
+ to archive from repository.
1722
+ Types: str OR Feature
1723
+
1724
+ RETURNS:
1725
+ bool
1726
+
1727
+ RAISES:
1728
+ TeradataMLException, TypeError, ValueError
1729
+
1730
+ EXAMPLES:
1731
+ >>> from teradataml import DataFrame, Feature, FeatureStore
1732
+ >>> load_example_data('dataframe', ['sales'])
1733
+ # Create teradataml DataFrame.
1734
+ >>> df = DataFrame("sales")
1735
+ # Create Feature for Column 'Feb'.
1736
+ >>> feature = Feature(name="sales_data_Feb", column=df.Feb)
1737
+ # Create FeatureStore for the repo 'staging_repo'.
1738
+ >>> fs = FeatureStore("staging_repo")
1739
+ # Apply the Feature to FeatureStore.
1740
+ >>> fs.apply(feature)
1741
+ True
1742
+ # List the available Features.
1743
+ >>> fs.list_features()
1744
+ column_name description creation_time modified_time tags data_type feature_type status group_name
1745
+ name
1746
+ sales_data_Feb Feb None 2024-10-03 18:21:03.720464 None None FLOAT CONTINUOUS ACTIVE None
1747
+
1748
+ # Archive Feature with name "sales_data_Feb".
1749
+ >>> fs.archive_feature(feature=feature)
1750
+ Feature 'sales_data_Feb' is archived.
1751
+ True
1752
+ # List the available Features after archive.
1753
+ >>> fs.list_features()
1754
+ Empty DataFrame
1755
+ Columns: [column_name, description, creation_time, modified_time, tags, data_type, feature_type, status, group_name]
1756
+ Index: []
1757
+ >>>
1758
+ """
1759
+ return self.__remove_obj(name=feature, type_="feature")
1760
+
1761
+ def delete_feature(self, feature):
1762
+ """
1763
+ DESCRIPTION:
1764
+ Removes the archived Feature from repository.
1765
+
1766
+ PARAMETERS:
1767
+ feature:
1768
+ Required Argument.
1769
+ Specifies either the name of Feature or Object of Feature
1770
+ to remove from repository.
1771
+ Types: str OR Feature
1772
+
1773
+ RETURNS:
1774
+ bool.
1775
+
1776
+ RAISES:
1777
+ TeradataMLException, TypeError, ValueError
1778
+
1779
+ EXAMPLES:
1780
+ >>> from teradataml import DataFrame, Feature, FeatureStore
1781
+ >>> load_example_data('dataframe', ['sales'])
1782
+ # Create teradataml DataFrame.
1783
+ >>> df = DataFrame("sales")
1784
+ # Create Feature for Column 'Feb'.
1785
+ >>> feature = Feature(name="sales_data_Feb", column=df.Feb)
1786
+ # Create a feature store with name "staging_repo".
1787
+ >>> fs = FeatureStore("staging_repo")
1788
+ # Add the feature created above in the feature store.
1789
+ >>> fs.apply(feature)
1790
+ True
1791
+ # Let's first archive the Feature.
1792
+ >>> fs.archive_feature(feature=feature)
1793
+ Feature 'sales_data_Feb' is archived.
1794
+ True
1795
+
1796
+ # Delete Feature with name "sales_data_Feb".
1797
+ >>> fs.delete_feature(feature=feature)
1798
+ Feature 'sales_data_Feb' is deleted.
1799
+ True
1800
+ >>>
1801
+ """
1802
+ return self.__remove_obj(name=feature, type_="feature", action="delete")
1803
+
1804
+ def archive_entity(self, entity):
1805
+ """
1806
+ DESCRIPTION:
1807
+ Archives Entity from repository. Note that archived Entity
1808
+ is not available for any further processing. Archived Entity can be
1809
+ viewed using "list_archived_entities()" method.
1810
+
1811
+ PARAMETERS:
1812
+ entity:
1813
+ Required Argument.
1814
+ Specifies either the name of Entity or Object of Entity
1815
+ to remove from repository.
1816
+ Types: str OR Entity
1817
+
1818
+ RETURNS:
1819
+ bool.
1820
+
1821
+ RAISES:
1822
+ TeradataMLException, TypeError, ValueError
1823
+
1824
+ EXAMPLES:
1825
+ >>> from teradataml import DataFrame, Entity, FeatureStore
1826
+ >>> load_example_data('dataframe', ['sales'])
1827
+ # Create teradataml DataFrame.
1828
+ >>> df = DataFrame("sales")
1829
+ # Create Entity using teradataml DataFrame Column.
1830
+ >>> entity = Entity(name="sales_data", columns=df.accounts)
1831
+ # Create FeatureStore for repo 'staging_repo'.
1832
+ >>> fs = FeatureStore("staging_repo")
1833
+ # Apply the entity to FeatureStore.
1834
+ >>> fs.apply(entity)
1835
+ True
1836
+ # List all the available entities.
1837
+ >>> fs.list_entities()
1838
+ description
1839
+ name entity_column
1840
+ sales_data accounts None
1841
+
1842
+ # Archive Entity with name "sales_data".
1843
+ >>> fs.archive_entity(entity=entity.name)
1844
+ Entity 'sales_data' is archived.
1845
+ True
1846
+ # List the entities after archive.
1847
+ >>> fs.list_entities()
1848
+ Empty DataFrame
1849
+ Columns: [description]
1850
+ Index: []
1851
+ """
1852
+ return self.__remove_obj(name=entity, type_="entity")
1853
+
1854
+ def delete_entity(self, entity):
1855
+ """
1856
+ DESCRIPTION:
1857
+ Removes archived Entity from repository.
1858
+
1859
+ PARAMETERS:
1860
+ entity:
1861
+ Required Argument.
1862
+ Specifies either the name of Entity or Object of Entity
1863
+ to delete from repository.
1864
+ Types: str OR Entity
1865
+
1866
+ RETURNS:
1867
+ bool.
1868
+
1869
+ RAISES:
1870
+ TeradataMLException, TypeError, ValueError
1871
+
1872
+ EXAMPLES:
1873
+ >>> from teradataml import DataFrame, Entity, FeatureStore
1874
+ >>> load_example_data('dataframe', ['sales'])
1875
+ # Create teradataml DataFrame.
1876
+ >>> df = DataFrame("sales")
1877
+ # Create Entity using teradataml DataFrame Column.
1878
+ >>> entity = Entity(name="sales_data", columns=df.accounts)
1879
+ # Create FeatureStore for repo 'staging_repo'.
1880
+ >>> fs = FeatureStore("staging_repo")
1881
+ # Apply the entity to FeatureStore.
1882
+ >>> fs.apply(entity)
1883
+ True
1884
+ # Let's first archive the entity.
1885
+ >>> fs.archive_entity(entity=entity.name)
1886
+ Entity 'sales_data' is archived.
1887
+ True
1888
+
1889
+ # Delete Entity with name "sales_data".
1890
+ >>> fs.delete_entity(entity=entity.name)
1891
+ Entity 'sales_data' is deleted.
1892
+ True
1893
+ >>>
1894
+ """
1895
+ return self.__remove_obj(name=entity, type_="entity", action="delete")
1896
+
1897
+ def __get_features_where_clause(self, features):
1898
+ """
1899
+ Internal function to prepare a where clause on features df.
1900
+ """
1901
+ col_expr = Col("name") == features[0]
1902
+ for feature in features[1:]:
1903
+ col_expr = ((col_expr) | (Col("name") == feature))
1904
+
1905
+ return col_expr
1906
+
1907
+ def archive_feature_group(self, feature_group):
1908
+ """
1909
+ DESCRIPTION:
1910
+ Archives FeatureGroup from repository. Note that archived FeatureGroup
1911
+ is not available for any further processing. Archived FeatureGroup can be
1912
+ viewed using "list_archived_feature_groups()" method.
1913
+ Note:
1914
+ The function archives the associated Features, Entity and DataSource
1915
+ if they are not associated with any other FeatureGroups.
1916
+
1917
+ PARAMETERS:
1918
+ feature_group:
1919
+ Required Argument.
1920
+ Specifies either the name of FeatureGroup or Object of FeatureGroup
1921
+ to archive from repository.
1922
+ Types: str OR FeatureGroup
1923
+
1924
+ RETURNS:
1925
+ bool.
1926
+
1927
+ RAISES:
1928
+ TeradataMLException, TypeError, ValueError
1929
+
1930
+ EXAMPLES:
1931
+ >>> from teradataml import DataFrame, FeatureGroup, FeatureStore
1932
+ >>> load_example_data('dataframe', ['sales'])
1933
+ # Create teradataml DataFrame.
1934
+ >>> df = DataFrame("sales")
1935
+ # Create FeatureGroup from teradataml DataFrame.
1936
+ >>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_col_name="datetime")
1937
+ # Create FeatureStore for the repo 'staging_repo'.
1938
+ >>> fs = FeatureStore("staging_repo")
1939
+ # Apply FeatureGroup to FeatureStore.
1940
+ >>> fs.apply(fg)
1941
+ True
1942
+ # List all the available FeatureGroups.
1943
+ >>> fs.list_feature_groups()
1944
+ description data_source_name entity_name
1945
+ name
1946
+ sales None sales sales
1947
+
1948
+ # Archive FeatureGroup with name "sales".
1949
+ >>> fs.archive_feature_group(feature_group='sales')
1950
+ FeatureGroup 'sales' is archived.
1951
+ True
1952
+ >>>
1953
+ # List all the available FeatureGroups after archive.
1954
+ >>> fs.list_feature_groups()
1955
+ Empty DataFrame
1956
+ Columns: [description, data_source_name, entity_name]
1957
+ Index: []
1958
+ """
1959
+ argument_validation_params = []
1960
+ argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
1961
+
1962
+ # Validate argument types
1963
+ _Validators._validate_function_arguments(argument_validation_params)
1964
+
1965
+ feature_group_name = feature_group if isinstance(feature_group, str) else feature_group.name
1966
+
1967
+ fg = self.get_feature_group(feature_group_name) if isinstance(feature_group, str) else feature_group
1968
+
1969
+ fg_df = self.list_feature_groups()
1970
+
1971
+ # Find out shared Features. Extract the features which are mapped to
1972
+ # other groups. They can not be deleted.
1973
+ feature_names = [f.name for f in fg.features]
1974
+ features_df = self.list_features()
1975
+ col_expr = self.__get_features_where_clause(feature_names)
1976
+ features_df = features_df[((features_df.group_name != fg.name) & (col_expr))]
1977
+ shared_features = [f.name for f in features_df.drop_duplicate('name').itertuples()]
1978
+ feature_names_to_remove = [f for f in feature_names if f not in shared_features]
1979
+
1980
+ # Find out shared Entities. If entity is not shared, then update 'entity_name'
1981
+ # to update value.
1982
+ entity_name = None
1983
+ ent = fg_df[((fg_df.entity_name == fg.entity.name) & (fg_df.name != fg.name))]
1984
+ recs = ent.shape[0]
1985
+ if recs == 0:
1986
+ entity_name = fg.entity.name
1987
+
1988
+ # Find out shared DataSources. If datasource is not shared, then update 'data_source_name'.
1989
+ data_source_name = None
1990
+ ds_df = fg_df[((fg_df.data_source_name == fg.data_source.name) & (fg_df.name != fg.name))]
1991
+ recs = ds_df.shape[0]
1992
+ if recs == 0:
1993
+ data_source_name = fg.data_source.name
1994
+
1995
+ res = self._archive_feature_group(fg.name, feature_names_to_remove, entity_name, data_source_name)
1996
+
1997
+ if res == 1:
1998
+ print("FeatureGroup '{}' is archived.".format(feature_group_name))
1999
+ return True
2000
+
2001
+ print("FeatureGroup '{}' not exist to archive.".format(feature_group_name))
2002
+ return False
2003
+
2004
+ @db_transaction
2005
+ def _archive_feature_group(self, group_name, feature_names, entity_name, data_source_name):
2006
+ """
2007
+ DESCRIPTION:
2008
+ Internal method to archive FeatureGroup from repository.
2009
+
2010
+ PARAMETERS:
2011
+ group_name:
2012
+ Required Argument.
2013
+ Specifies the name of FeatureGroup to archive from repository.
2014
+ Types: str
2015
+
2016
+ feature_names:
2017
+ Required Argument.
2018
+ Specifies the name of Features to archive from repository.
2019
+ Types: list
2020
+
2021
+ entity_name:
2022
+ Required Argument.
2023
+ Specifies the name of Entity to archive from repository.
2024
+ Types: str
2025
+
2026
+ data_source_name:
2027
+ Required Argument.
2028
+ Specifies the name of DataSource to archive from repository.
2029
+ Types: str
2030
+
2031
+ RETURNS:
2032
+ bool.
2033
+
2034
+ RAISES:
2035
+ OperationalError
2036
+
2037
+ EXAMPLES:
2038
+ >>> self._archive_feature_group("group1", ["feature1"], "entity_name", None)
2039
+ """
2040
+ # Remove data for FeatureGroup from Xref table.
2041
+ # This allows to remove data from other tables.
2042
+ res = _delete_data(schema_name=self.__repo,
2043
+ table_name=EFS_GROUP_FEATURES_SPEC["table_name"],
2044
+ delete_conditions=(Col("group_name") == group_name)
2045
+ )
2046
+
2047
+ # Remove FeatureGroup.
2048
+ res = _delete_data(schema_name=self.__repo,
2049
+ table_name=EFS_FEATURE_GROUP_SPEC["table_name"],
2050
+ delete_conditions=(Col("name") == group_name)
2051
+ )
2052
+
2053
+ # Remove Features.
2054
+ if feature_names:
2055
+ _delete_data(schema_name=self.__repo,
2056
+ table_name=EFS_FEATURES_SPEC["table_name"],
2057
+ delete_conditions=self.__get_features_where_clause(feature_names)
2058
+ )
2059
+
2060
+ # Remove entities.
2061
+ if entity_name:
2062
+ _delete_data(schema_name=self.__repo,
2063
+ table_name=EFS_ENTITY_XREF_SPEC["table_name"],
2064
+ delete_conditions=(Col("entity_name") == entity_name)
2065
+ )
2066
+
2067
+ _delete_data(schema_name=self.__repo,
2068
+ table_name=EFS_ENTITY_SPEC["table_name"],
2069
+ delete_conditions=(Col("name") == entity_name)
2070
+ )
2071
+
2072
+ # Remove DataSource.
2073
+ if data_source_name:
2074
+ _delete_data(schema_name=self.__repo,
2075
+ table_name=EFS_DATA_SOURCE_SPEC["table_name"],
2076
+ delete_conditions=(Col("name") == data_source_name),
2077
+ )
2078
+
2079
+ return res
2080
+
2081
+ @db_transaction
2082
+ def delete_feature_group(self, feature_group):
2083
+ """
2084
+ DESCRIPTION:
2085
+ Removes archived FeatureGroup from repository.
2086
+ Note:
2087
+ Unlike 'archive_feature_group()', this function does not delete the
2088
+ associated Features, Entity and DataSource. One should delete those
2089
+ using 'delete_feature()', 'delete_entity()' and 'delete_data_source()'.
2090
+
2091
+ PARAMETERS:
2092
+ feature_group:
2093
+ Required Argument.
2094
+ Specifies either the name of FeatureGroup or Object of FeatureGroup
2095
+ to delete from repository.
2096
+ Types: str OR FeatureGroup
2097
+
2098
+ RETURNS:
2099
+ bool
2100
+
2101
+ RAISES:
2102
+ TeradataMLException, TypeError, ValueError
2103
+
2104
+ EXAMPLES:
2105
+ >>> from teradataml import DataFrame, FeatureGroup, FeatureStore
2106
+ >>> load_example_data('dataframe', ['sales'])
2107
+ # Create teradataml DataFrame.
2108
+ >>> df = DataFrame("sales")
2109
+ # Create FeatureGroup from teradataml DataFrame.
2110
+ >>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_col_name="datetime")
2111
+ # Create FeatureStore for the repo 'staging_repo'.
2112
+ >>> fs = FeatureStore("staging_repo")
2113
+ # Apply FeatureGroup to FeatureStore.
2114
+ >>> fs.apply(fg)
2115
+ True
2116
+ # Let's first archive FeatureGroup with name "sales".
2117
+ >>> fs.archive_feature_group(feature_group='sales')
2118
+ FeatureGroup 'sales' is archived.
2119
+ True
2120
+
2121
+ # Delete FeatureGroup with name "sales".
2122
+ >>> fs.delete_feature_group(feature_group='sales')
2123
+ FeatureGroup 'sales' is deleted.
2124
+ True
2125
+ >>>
2126
+ """
2127
+ argument_validation_params = []
2128
+ argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
2129
+
2130
+ # Validate argument types
2131
+ _Validators._validate_function_arguments(argument_validation_params)
2132
+
2133
+ fg_name = feature_group if isinstance(feature_group, str) else feature_group.name
2134
+
2135
+ # Remove data for FeatureGroup.
2136
+ _delete_data(table_name=self.__table_names["group_features_staging"],
2137
+ delete_conditions=(Col("group_name") == fg_name)
2138
+ )
2139
+
2140
+ res = _delete_data(table_name=self.__table_names["feature_group_staging"],
2141
+ delete_conditions=(Col("name") == fg_name)
2142
+ )
2143
+
2144
+ if res == 1:
2145
+ print("FeatureGroup '{}' is deleted.".format(fg_name))
2146
+ return True
2147
+
2148
+ print("FeatureGroup '{}' not exist to delete.".format(fg_name))
2149
+ return False
2150
+
2151
+ def __get_obj_df(self, obj_type):
2152
+ """
2153
+ DESCRIPTION:
2154
+ Internal method to return either Features DataFrame OR Entity DataFrame
2155
+ OR DataSource DataFrame OR FeatureGroup DataFrame.
2156
+
2157
+ PARAMETERS:
2158
+ obj_type
2159
+ Required Argument.
2160
+ Specifies the type of DataFrame to return.
2161
+ Allowed Values:
2162
+ * feature
2163
+ * feature_group
2164
+ * entity
2165
+ * data_source
2166
+ * group_features
2167
+
2168
+ RETURNS:
2169
+ teradataml DataFrame.
2170
+
2171
+ RAISES:
2172
+ None
2173
+
2174
+ EXAMPLES:
2175
+ fs.__get_features_df()
2176
+ """
2177
+ if obj_type not in self.__df_container:
2178
+
2179
+ # For feature or feature_staging, join it with xref table
2180
+ # so group name appears while listing features.
2181
+ map_ = {"feature": "group_features", "feature_staging": "group_features_staging"}
2182
+ if obj_type in map_:
2183
+ features = DataFrame(self.__table_names[obj_type])
2184
+ features_xref = DataFrame(self.__table_names[map_[obj_type]]).select(
2185
+ ["feature_name", "group_name"])
2186
+ df = features.join(features_xref, on="name==feature_name", how='left')
2187
+ self.__df_container[obj_type] = df.select(features.columns+["group_name"])
2188
+ # For entity, join with xref table.
2189
+ elif obj_type == "entity" or obj_type == "entity_staging":
2190
+ ent_df = DataFrame(self.__table_names[obj_type])
2191
+ xref_df = DataFrame(self.__table_names["{}_xref".format(obj_type)]).select(
2192
+ ['entity_name', 'entity_column'])
2193
+ df = ent_df.join(xref_df, on="name==entity_name", how="inner")
2194
+ self.__df_container[obj_type] = df.select(ent_df.columns+["entity_column"])
2195
+ else:
2196
+ self.__df_container[obj_type] = DataFrame(self.__table_names[obj_type])
2197
+
2198
+ return self.__df_container[obj_type]
2199
+
2200
+ def version(self):
2201
+ """
2202
+ DESCRIPTION:
2203
+ Get the FeatureStore version.
2204
+
2205
+ PARAMETERS:
2206
+ None
2207
+
2208
+ RETURNS:
2209
+ str
2210
+
2211
+ RAISES:
2212
+ None
2213
+
2214
+ EXAMPLES:
2215
+ # Example 1: Get the version of FeatureStore version for
2216
+ # the repo 'vfs_v1'.
2217
+ >>> from teradataml import FeatureStore
2218
+ >>> fs = FeatureStore('vfs_v1')
2219
+ >>> fs.version()
2220
+ '1.0.0'
2221
+ >>>
2222
+ """
2223
+ return self.__version