teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (96) hide show
  1. teradataml/README.md +210 -0
  2. teradataml/__init__.py +1 -1
  3. teradataml/_version.py +1 -1
  4. teradataml/analytics/analytic_function_executor.py +162 -76
  5. teradataml/analytics/byom/__init__.py +1 -1
  6. teradataml/analytics/json_parser/__init__.py +2 -0
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
  8. teradataml/analytics/json_parser/metadata.py +22 -4
  9. teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
  10. teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
  11. teradataml/analytics/sqle/__init__.py +3 -0
  12. teradataml/analytics/utils.py +4 -1
  13. teradataml/automl/__init__.py +2369 -464
  14. teradataml/automl/autodataprep/__init__.py +15 -0
  15. teradataml/automl/custom_json_utils.py +184 -112
  16. teradataml/automl/data_preparation.py +113 -58
  17. teradataml/automl/data_transformation.py +154 -53
  18. teradataml/automl/feature_engineering.py +113 -53
  19. teradataml/automl/feature_exploration.py +548 -25
  20. teradataml/automl/model_evaluation.py +260 -32
  21. teradataml/automl/model_training.py +399 -206
  22. teradataml/clients/auth_client.py +2 -2
  23. teradataml/common/aed_utils.py +11 -2
  24. teradataml/common/bulk_exposed_utils.py +4 -2
  25. teradataml/common/constants.py +62 -2
  26. teradataml/common/garbagecollector.py +50 -21
  27. teradataml/common/messagecodes.py +47 -2
  28. teradataml/common/messages.py +19 -1
  29. teradataml/common/sqlbundle.py +23 -6
  30. teradataml/common/utils.py +116 -10
  31. teradataml/context/aed_context.py +16 -10
  32. teradataml/data/Employee.csv +5 -0
  33. teradataml/data/Employee_Address.csv +4 -0
  34. teradataml/data/Employee_roles.csv +5 -0
  35. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  36. teradataml/data/byom_example.json +5 -0
  37. teradataml/data/creditcard_data.csv +284618 -0
  38. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  39. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
  40. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
  42. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  43. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
  44. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
  45. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
  46. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
  47. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
  48. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
  49. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
  50. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
  51. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
  52. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
  53. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
  54. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  55. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  56. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  57. teradataml/data/load_example_data.py +29 -11
  58. teradataml/data/payment_fraud_dataset.csv +10001 -0
  59. teradataml/data/teradataml_example.json +67 -0
  60. teradataml/dataframe/copy_to.py +714 -54
  61. teradataml/dataframe/dataframe.py +1153 -33
  62. teradataml/dataframe/dataframe_utils.py +8 -3
  63. teradataml/dataframe/functions.py +168 -1
  64. teradataml/dataframe/setop.py +4 -1
  65. teradataml/dataframe/sql.py +141 -9
  66. teradataml/dbutils/dbutils.py +470 -35
  67. teradataml/dbutils/filemgr.py +1 -1
  68. teradataml/hyperparameter_tuner/optimizer.py +456 -142
  69. teradataml/lib/aed_0_1.dll +0 -0
  70. teradataml/lib/libaed_0_1.dylib +0 -0
  71. teradataml/lib/libaed_0_1.so +0 -0
  72. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  73. teradataml/scriptmgmt/UserEnv.py +234 -34
  74. teradataml/scriptmgmt/lls_utils.py +43 -17
  75. teradataml/sdk/_json_parser.py +1 -1
  76. teradataml/sdk/api_client.py +9 -6
  77. teradataml/sdk/modelops/_client.py +3 -0
  78. teradataml/series/series.py +12 -7
  79. teradataml/store/feature_store/constants.py +601 -234
  80. teradataml/store/feature_store/feature_store.py +2886 -616
  81. teradataml/store/feature_store/mind_map.py +639 -0
  82. teradataml/store/feature_store/models.py +5831 -214
  83. teradataml/store/feature_store/utils.py +390 -0
  84. teradataml/table_operators/table_operator_util.py +1 -1
  85. teradataml/table_operators/templates/dataframe_register.template +6 -2
  86. teradataml/table_operators/templates/dataframe_udf.template +6 -2
  87. teradataml/utils/docstring.py +527 -0
  88. teradataml/utils/dtypes.py +93 -0
  89. teradataml/utils/internal_buffer.py +2 -2
  90. teradataml/utils/utils.py +41 -2
  91. teradataml/utils/validators.py +694 -17
  92. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
  93. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
  94. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
  95. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
  96. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
@@ -7,28 +7,42 @@ Secondary Owner: adithya.avvaru@teradata.com
7
7
 
8
8
  This file implements the core framework that allows user to use Teradata Enterprise Feature Store.
9
9
  """
10
-
10
+ import os.path
11
+ import operator
12
+ import random
13
+ from functools import reduce
11
14
  from sqlalchemy import literal_column
12
- from teradataml.context.context import get_connection
13
- from teradataml.common.constants import SQLConstants
15
+ from teradataml.context.context import get_connection, _get_current_databasename
16
+ from teradataml.common.constants import SQLConstants, AccessQueries
14
17
  from teradataml.common.exceptions import TeradataMlException
15
18
  from teradataml.common.messages import Messages
16
19
  from teradataml.common.messagecodes import MessageCodes
17
20
  from teradataml.dataframe.sql import _SQLColumnExpression as Col
18
- from teradataml.dbutils.dbutils import _create_database, _create_table, db_drop_table, execute_sql, Grant, Revoke, _update_data, _delete_data, db_transaction
21
+ from teradataml.dbutils.dbutils import _create_database, _create_table, db_drop_table, execute_sql, Grant, Revoke, \
22
+ _update_data, _delete_data, db_transaction, db_list_tables, _insert_data, \
23
+ _is_trigger_exist, db_drop_view, _get_quoted_object_name
19
24
  from teradataml.store.feature_store.constants import *
25
+ from teradataml.store.feature_store.mind_map import _TD_FS_MindMap_Template
20
26
  from teradataml.store.feature_store.models import *
27
+ from teradataml.store.feature_store.constants import _FeatureStoreDFContainer
21
28
  from teradataml.common.sqlbundle import SQLBundle
22
29
  from teradataml.utils.validators import _Validators
30
+ from teradataml.store.feature_store.utils import _FSUtils
23
31
 
24
32
 
25
33
  class FeatureStore:
26
34
  """Class for FeatureStore."""
27
35
 
28
- def __init__(self, repo):
36
+ def __init__(self,
37
+ repo,
38
+ data_domain=None,
39
+ check=True):
29
40
  """
30
41
  DESCRIPTION:
31
42
  Method to create FeatureStore in teradataml.
43
+ Note:
44
+ * One should establish a connection to Vantage using create_context()
45
+ before creating a FeatureStore object.
32
46
 
33
47
  PARAMETERS:
34
48
  repo:
@@ -36,6 +50,21 @@ class FeatureStore:
36
50
  Specifies the repository name.
37
51
  Types: str.
38
52
 
53
+ data_domain:
54
+ Optional Argument.
55
+ Specifies the data domain to which FeatureStore points to.
56
+ Note:
57
+ * If not specified, then default database name is considered as data domain.
58
+ Types: str
59
+
60
+ check:
61
+ Optional Argument.
62
+ Specifies whether to check the existence of the Feature store DB objects or not.
63
+ When set to True, the method checks for the existence of Feature store DB objects.
64
+ Otherwise, the method does not verify the existence of Feature store DB objects.
65
+ Default Value: True
66
+ Types: bool
67
+
39
68
  RETURNS:
40
69
  Object of FeatureStore.
41
70
 
@@ -43,18 +72,28 @@ class FeatureStore:
43
72
  None
44
73
 
45
74
  EXAMPLES:
46
- >>> # Create FeatureStore for repository 'vfs_v1'.
75
+ # Example 1: Create an instance of FeatureStore for repository 'vfs_v1'.
47
76
  >>> from teradataml import FeatureStore
48
- >>> fs = FeatureStore('vfs_v1')
77
+ >>> fs = FeatureStore(repo='vfs_v1')
78
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
79
+
80
+ >>> fs.setup()
81
+ True
82
+
49
83
  >>> fs
50
- FeatureStore(vfs_v1)-v1.0
51
- >>>
84
+ VantageFeatureStore(abc)-v2.0.0
52
85
  """
53
86
  argument_validation_params = []
54
87
  argument_validation_params.append(["repo", repo, False, (str), True])
55
88
 
56
89
  # Validate argument types
57
90
  _Validators._validate_function_arguments(argument_validation_params)
91
+
92
+ connection = get_connection()
93
+ if connection is None:
94
+ raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE),
95
+ MessageCodes.CONNECTION_FAILURE)
96
+
58
97
  # Do not validate the existance of repo as it consumes a network call.
59
98
  self.__repo = repo
60
99
  self.__version = ""
@@ -66,24 +105,181 @@ class FeatureStore:
66
105
  self.__df_container = {}
67
106
 
68
107
  # Store the table names here. Then use this where ever required.
69
- self.__table_names = EFS_TABLES
70
-
71
- # Declare getter's for getting the corresponding DataFrame's.
72
- self.__get_features_df = lambda : self.__get_obj_df("feature")
73
- self.__get_archived_features_df = lambda : self.__get_obj_df("feature_staging")
74
- self.__get_group_features_df = lambda : self.__get_obj_df("group_features")
75
- self.__get_archived_group_features_df = lambda : self.__get_obj_df("group_features_staging")
76
- self.__get_feature_group_df = lambda : self.__get_obj_df("feature_group")
77
- self.__get_archived_feature_group_df = lambda : self.__get_obj_df("feature_group_staging")
78
- self.__get_entity_df = lambda : self.__get_obj_df("entity")
79
- self.__get_archived_entity_df = lambda : self.__get_obj_df("entity_staging")
80
- self.__get_data_source_df = lambda : self.__get_obj_df("data_source")
81
- self.__get_archived_data_source_df = lambda : self.__get_obj_df("data_source_staging")
108
+ self.__table_names = EFS_DB_COMPONENTS
109
+
110
+ # Declare getter's for getting the corresponding DataFrame's using _FeatureStoreDFContainer directly.
111
+ # Only keep the lambda functions that are actually used in the codebase
112
+ self.__get_features_df = lambda : _FeatureStoreDFContainer.get_df("feature", self.__repo, self.__data_domain)
113
+ self.__get_features_wog_df = lambda : _FeatureStoreDFContainer.get_df("feature_wog", self.__repo, self.__data_domain)
114
+ self.__get_archived_features_df = lambda : _FeatureStoreDFContainer.get_df("feature_staging", self.__repo, self.__data_domain)
115
+ self.__get_feature_group_df = lambda : _FeatureStoreDFContainer.get_df("feature_group", self.__repo, self.__data_domain)
116
+ self.__get_archived_feature_group_df = lambda : _FeatureStoreDFContainer.get_df("feature_group_staging", self.__repo, self.__data_domain)
117
+ self.__get_entity_df = lambda : _FeatureStoreDFContainer.get_df("entity", self.__repo, self.__data_domain)
118
+ self.__get_archived_entity_df = lambda : _FeatureStoreDFContainer.get_df("entity_staging", self.__repo, self.__data_domain)
119
+ self.__get_data_source_df = lambda : _FeatureStoreDFContainer.get_df("data_source", self.__repo, self.__data_domain)
120
+ self.__get_archived_data_source_df = lambda : _FeatureStoreDFContainer.get_df("data_source_staging", self.__repo, self.__data_domain)
121
+ self.__get_dataset_catalog_df = lambda : _FeatureStoreDFContainer.get_df("dataset_catalog", self.__repo, self.__data_domain)
122
+ self.__get_data_domain_df = lambda : _FeatureStoreDFContainer.get_df("data_domain", self.__repo, self.__data_domain)
123
+ self.__get_feature_process_df = lambda : _FeatureStoreDFContainer.get_df("feature_process", self.__repo, self.__data_domain)
124
+ self.__get_features_metadata_df = lambda : _FeatureStoreDFContainer.get_df("feature_metadata", self.__repo, self.__data_domain)
125
+ self.__get_feature_info_df = lambda: _FeatureStoreDFContainer.get_df("feature_info", self.__repo, self.__data_domain)
126
+ self.__get_dataset_features_df = lambda: _FeatureStoreDFContainer.get_df("dataset_features", self.__repo, self.__data_domain)
127
+ self.__get_feature_runs_df = lambda : _FeatureStoreDFContainer.get_df("feature_runs", self.__repo, self.__data_domain)
128
+ self.__get_without_valid_period_df = lambda df: df.drop(columns=['ValidPeriod'])
129
+ self.__get_feature_version = lambda: _FeatureStoreDFContainer.get_df("feature_version", self.__repo, self.__data_domain)
82
130
 
83
131
  self.__good_status = "Good"
84
132
  self.__bad_status = "Bad"
85
133
  self.__repaired_status = "Repaired"
86
134
 
135
+ self.__data_domain = data_domain if data_domain is not None else _get_current_databasename()
136
+
137
+ self.__repo_exists = connection.dialect._get_database_names(connection, self.__repo)
138
+
139
+ if check:
140
+ return self.__validate_repo_exists()
141
+ else:
142
+ # If check is False, then do not check for the existence of DB objects.
143
+ self.__add_data_domain()
144
+
145
+ def __validate_repo_exists(self):
146
+ """
147
+ Validate the repository.
148
+
149
+ PARAMETERS:
150
+ None
151
+
152
+ RETURNS:
153
+ None
154
+
155
+ RAISES:
156
+ ValueError: If the repo is invalid.
157
+ """
158
+ # Check whether the repo exists or not.
159
+ if not self.__repo_exists:
160
+ print("Repo {} does not exist. Run FeatureStore.setup() " \
161
+ "to create the repo and setup FeatureStore.".format(self.__repo))
162
+ return
163
+
164
+ # Check whether all the EFS tables exist or not.
165
+ existing_tabs = db_list_tables(schema_name=self.__repo, object_name='_efs%')
166
+ if not existing_tabs.empty:
167
+ existing_tables = set(existing_tabs['TableName'].tolist())
168
+ all_tables_exist = all(val in existing_tables for val in EFS_TABLES.values())
169
+ else:
170
+ all_tables_exist = False
171
+ # Check whether all the EFS triggers exist or not.
172
+ all_triggers_exist, num_trigger_exist = _is_trigger_exist(self.__repo, list(EFS_TRIGGERS.values()))
173
+
174
+ # Check whether all the EFS tables and triggers exist or not.
175
+ # If exists, then insert the data domain name into _efs_data_domain table.
176
+ if all_tables_exist and all_triggers_exist:
177
+ self.__add_data_domain()
178
+ # If all the tables and triggers are available, then
179
+ # FeatureStore is ready to use.
180
+ print("FeatureStore is ready to use.")
181
+ # All table and triggers does not exist.
182
+ # If the count of tables and triggers is 0, then
183
+ # FeatureStore is not setup.
184
+ elif num_trigger_exist == 0 and len(existing_tabs) == 0:
185
+ print("FeatureStore is not setup(). Run FeatureStore.setup() to setup FeatureStore.")
186
+ else:
187
+ print("Some of the feature store objects are missing. Run FeatureStore.repair() to create missing objects.")
188
+
189
+ @property
190
+ def data_domain(self):
191
+ """
192
+ DESCRIPTION:
193
+ Get the data domain.
194
+
195
+ PARAMETERS:
196
+ None
197
+
198
+ RETURNS:
199
+ str
200
+
201
+ RAISES:
202
+ None
203
+
204
+ EXAMPLES:
205
+ # Example 1: Use existing FeatureStore 'vfs_v1' to get the data domain.
206
+ >>> from teradataml import FeatureStore
207
+ >>> fs = FeatureStore(repo='vfs_v1', data_domain='test_domain')
208
+ FeatureStore is ready to use.
209
+ >>> fs.data_domain
210
+ 'test_domain'
211
+ """
212
+ return self.__data_domain
213
+
214
+ @data_domain.setter
215
+ def data_domain(self, value):
216
+ """
217
+ DESCRIPTION:
218
+ Set the data domain.
219
+
220
+ PARAMETERS:
221
+ value:
222
+ Required Argument.
223
+ Specifies the data domain name.
224
+ Types: str.
225
+
226
+ RETURNS:
227
+ None.
228
+
229
+ RAISES:
230
+ None
231
+
232
+ EXAMPLES:
233
+ # Example 1: Create or use existing FeatureStore for repository 'abc' and
234
+ # then change the data domain to 'xyz'.
235
+ >>> from teradataml import FeatureStore
236
+ >>> fs = FeatureStore('abc')
237
+ FeatureStore is ready to use.
238
+
239
+ # Set the data domain to 'xyz'.
240
+ >>> fs.data_domain = 'xyz'
241
+
242
+ # Get the data domain.
243
+ >>> fs.data_domain
244
+ 'xyz'
245
+ """
246
+ argument_validation_params = []
247
+ argument_validation_params.append(["value", value, False, (str), True])
248
+
249
+ # Validate argument types
250
+ _Validators._validate_function_arguments(argument_validation_params)
251
+
252
+ # Set the data domain value.
253
+ self.__data_domain = value
254
+ self.__add_data_domain()
255
+
256
+ def __add_data_domain(self):
257
+ """
258
+ DESCRIPTION:
259
+ Internal method to add the data domain.
260
+
261
+ PARAMETERS:
262
+ data_domain:
263
+ Required Argument.
264
+ Specifies the data domain name.
265
+ Types: str.
266
+
267
+ RETURNS:
268
+ None.
269
+
270
+ RAISES:
271
+ None
272
+
273
+ EXAMPLES:
274
+ >>> self.__add_data_domain()
275
+ """
276
+ # Add the data domain to the EFS_DATA_DOMAINS table.
277
+ _insert_data(table_name=self.__table_names['data_domain'],
278
+ schema_name=self.__repo,
279
+ values=(self.__data_domain, dt.utcnow()),
280
+ columns=["name", "created_time"],
281
+ ignore_errors=[2801])
282
+
87
283
  @property
88
284
  def repo(self):
89
285
  """
@@ -100,11 +296,14 @@ class FeatureStore:
100
296
  None
101
297
 
102
298
  EXAMPLES:
299
+ # Example 1: Get the repository name from FeatureStore.
103
300
  >>> from teradataml import FeatureStore
104
301
  >>> fs = FeatureStore('vfs_v1')
302
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
303
+
304
+ # Get the repository name.
105
305
  >>> fs.repo
106
- vfs_v1
107
- >>>
306
+ 'vfs_v1'
108
307
  """
109
308
  return self.__repo
110
309
 
@@ -131,8 +330,16 @@ class FeatureStore:
131
330
  # then change the repository to 'xyz'.
132
331
  >>> from teradataml import FeatureStore
133
332
  >>> fs = FeatureStore('abc')
333
+ FeatureStore is ready to use.
334
+
335
+ # Get the repository name.
336
+ >>> fs.repo
337
+ 'abc'
338
+
339
+ # Set the repository to 'xyz'.
134
340
  >>> fs.repo = 'xyz'
135
- >>>
341
+ >>> fs.repo
342
+ 'xyz'
136
343
  """
137
344
  argument_validation_params = []
138
345
  argument_validation_params.append(["value", value, False, (str), True])
@@ -141,7 +348,12 @@ class FeatureStore:
141
348
  _Validators._validate_function_arguments(argument_validation_params)
142
349
  # remove all entries from container so they will be automatically
143
350
  # point to new repo for subsequent API's.
351
+ self.__repo_exists = get_connection().dialect._get_database_names(get_connection(),
352
+ value)
353
+ self.__validate_repo_exists()
354
+
144
355
  self.__df_container.clear()
356
+
145
357
  self.__version = None
146
358
 
147
359
  # Set the repo value.
@@ -160,6 +372,19 @@ class FeatureStore:
160
372
 
161
373
  RAISES:
162
374
  None
375
+
376
+ EXAMPLES:
377
+ >>> from teradataml import FeatureStore
378
+ >>> fs = FeatureStore('vfs_v1')
379
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
380
+
381
+ # Setup FeatureStore for this repository.
382
+ >>> fs.setup()
383
+
384
+ # Get the string representation of FeatureStore.
385
+ >>> fs
386
+ 'VantageFeatureStore(vfs_v1)-v2.0.0'
387
+
163
388
  """
164
389
  s = "VantageFeatureStore({})".format(self.__repo)
165
390
  try:
@@ -183,7 +408,7 @@ class FeatureStore:
183
408
  None
184
409
  """
185
410
  if not self.__version:
186
- sql = "SELECT version FROM {}.{}".format(self.__repo, EFS_VERSION_SPEC["table_name"])
411
+ sql = "SELECT version FROM {}.{}".format(self.__repo, self.__table_names['version'])
187
412
  self.__version = next(execute_sql(sql))[0]
188
413
  return self.__version
189
414
 
@@ -203,14 +428,23 @@ class FeatureStore:
203
428
  None
204
429
 
205
430
  EXAMPLES:
206
- # List down all the FeatureStore repositories.
431
+ >>> from teradataml import FeatureStore
432
+ # Example 1: List all the FeatureStore repositories using FeatureStore class.
207
433
  >>> FeatureStore.list_repos()
208
434
  repos
209
435
  0 vfs_v1
210
- >>>
436
+
437
+ # Example 2: List all the FeatureStore repositories using FeatureStore object.
438
+ >>> fs = FeatureStore('vfs_v1')
439
+ FeatureStore is ready to use.
440
+
441
+ >>> fs.list_repos()
442
+ repos
443
+ 0 vfs_v1
444
+
211
445
  """
212
446
  return DataFrame.from_query("select distinct DataBaseName as repos from dbc.tablesV where TableName='{}'".format(
213
- EFS_VERSION_SPEC["table_name"]))
447
+ EFS_DB_COMPONENTS['version']))
214
448
 
215
449
  def setup(self, perm_size='10e9', spool_size='10e8'):
216
450
  """
@@ -250,77 +484,74 @@ class FeatureStore:
250
484
  TeradatamlException
251
485
 
252
486
  EXAMPLES:
253
- # Setup FeatureStore for repo 'vfs_v1'.
487
+ # Example 1: Setup FeatureStore for repository 'vfs_v1'.
254
488
  >>> from teradataml import FeatureStore
489
+ # Create FeatureStore for repo 'vfs_v1'.
255
490
  >>> fs = FeatureStore("vfs_v1")
491
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
492
+
493
+ # Setup FeatureStore for this repository.
256
494
  >>> fs.setup()
257
495
  True
258
- >>>
259
- """
260
496
 
261
- repo_exists = get_connection().dialect._get_database_names(
262
- get_connection(), self.__repo)
497
+ >>> fs
498
+ VantageFeatureStore(vfs_v1)-v2.0.0
499
+
500
+ # Example 2: Setup FeatureStore for repository 'vfs_v2' with custom perm_size and spool_size.
501
+ # Create FeatureStore for repo 'vfs_v2'.
502
+ >>> fs = FeatureStore("vfs_v2")
503
+ Repo vfs_v2 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
504
+
505
+ # Setup FeatureStore for this repository.
506
+ >>> fs.setup(perm_size='50e6', spool_size='50e6')
507
+ True
508
+
509
+ >>> fs
510
+ VantageFeatureStore(vfs_v2)-v2.0.0
263
511
 
512
+ """
264
513
  # If repo does not exist, then create it.
265
- if not repo_exists:
514
+ if not self.__repo_exists:
266
515
  _create_database(self.__repo, perm_size, spool_size)
267
516
 
268
517
  # Check whether version table exists or not. If exist, assume all
269
518
  # tables are available.
270
519
  all_tables_exist = get_connection().dialect.has_table(
271
- get_connection(), EFS_VERSION_SPEC['table_name'], schema=self.__repo)
520
+ get_connection(), self.__table_names['version'], schema=self.__repo)
272
521
 
273
522
  if not all_tables_exist:
274
- # Create the tables.
275
- table_specs = [EFS_FEATURES_SPEC,
276
- EFS_DATA_SOURCE_SPEC,
277
- EFS_ENTITY_SPEC,
278
- EFS_ENTITY_XREF_SPEC,
279
- EFS_FEATURE_GROUP_SPEC,
280
- EFS_GROUP_FEATURES_SPEC,
281
- EFS_VERSION_SPEC]
282
-
283
- staging_table_specs = [
284
- EFS_FEATURES_STAGING_SPEC,
285
- EFS_DATA_SOURCE_STAGING_SPEC,
286
- EFS_ENTITY_STAGING_SPEC,
287
- EFS_ENTITY_XREF_STAGING_SPEC,
288
- EFS_GROUP_FEATURES_STAGING_SPEC,
289
- EFS_FEATURE_GROUP_STAGING_SPEC
290
- ]
291
-
292
- triggers_specs = [
293
- EFS_FEATURES_TRG,
294
- EFS_GROUP_FEATURES_TRG,
295
- EFS_FEATURE_GROUP_TRG,
296
- EFS_DATA_SOURCE_TRG,
297
- EFS_ENTITY_TRG,
298
- EFS_ENTITY_XREF_TRG
299
- ]
300
-
301
- for table_spec in table_specs + staging_table_specs:
302
- params_ = {"table_name": table_spec["table_name"],
303
- "columns": table_spec["columns"],
304
- "primary_index": table_spec.get("primary_index"),
305
- "unique": True if table_spec.get("primary_index") else False,
306
- "schema_name": self.__repo,
307
- "set_table": False
308
- }
309
- if "foreign_keys" in table_spec:
310
- params_["foreign_key_constraint"] = table_spec.get("foreign_keys")
311
-
312
- _create_table(**params_)
313
-
314
- for trigger_spec in triggers_specs:
315
- execute_sql(trigger_spec.format(schema_name=self.__repo))
523
+ # Create the object tables.
524
+ for table_spec, table_name in EFS_TABLES.items():
525
+ execute_sql(table_spec.format(self.__repo, table_name))
526
+ # Create the Triggers.
527
+ for trigger_spec, trg_name in EFS_TRIGGERS.items():
528
+ alter_name = trg_name.split('_trg')[0]
529
+ insert_name = self.__repo+'.'+alter_name+'_staging'
530
+ execute_sql(trigger_spec.format(self.__repo, trg_name,
531
+ alter_name, insert_name))
532
+
533
+ # Create feature versions view.
534
+ sql = EFS_FEATURE_VERSION.format(self.__repo,
535
+ EFS_DB_COMPONENTS['feature_version'],
536
+ self.__repo,
537
+ self.__table_names['feature_process']
538
+ )
539
+ execute_sql(sql)
316
540
 
317
541
  # After the setup is done, populate the version.
318
- insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, EFS_VERSION_SPEC["table_name"])
319
- execute_sql(insert_model, (EFS_VERSION, datetime.datetime.now()))
542
+ insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, self.__table_names['version'])
543
+ execute_sql(insert_model, (EFS_VERSION_, datetime.datetime.now()))
320
544
 
321
- if repo_exists and all_tables_exist:
545
+ # Create the data domain in _efs_data_domain table.
546
+ self.__add_data_domain()
547
+
548
+ if self.__repo_exists and all_tables_exist:
322
549
  print("EFS is already setup for the repo {}.".format(self.__repo))
323
550
 
551
+ # Set the repo_exists to True
552
+ self.__repo_exists = True
553
+ return True
554
+
324
555
  @property
325
556
  def grant(self):
326
557
  """
@@ -340,8 +571,10 @@ class FeatureStore:
340
571
 
341
572
  EXAMPLES:
342
573
  >>> from teradataml import FeatureStore
343
- # Create FeatureStore for repo 'vfs_v1'.
344
- >>> fs = FeatureStore("vfs_v1")
574
+ # Create FeatureStore for repo 'vfs_v2'.
575
+ >>> fs = FeatureStore("vfs_v2")
576
+ Repo vfs_v2 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
577
+
345
578
  # Setup FeatureStore for this repository.
346
579
  >>> fs.setup()
347
580
  True
@@ -359,9 +592,8 @@ class FeatureStore:
359
592
  True
360
593
 
361
594
  """
362
- table_names = {name: UtilFuncs._get_qualified_table_name(self.__repo, table_name)
363
- for name, table_name in EFS_TABLES.items()}
364
- return Grant(list(table_names.values()))
595
+ return Grant(objects=AccessQueries,
596
+ database=self.__repo)
365
597
 
366
598
  @property
367
599
  def revoke(self):
@@ -384,6 +616,8 @@ class FeatureStore:
384
616
  >>> from teradataml import FeatureStore
385
617
  # Create FeatureStore for repo 'vfs_v1'.
386
618
  >>> fs = FeatureStore("vfs_v1")
619
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
620
+
387
621
  # Setup FeatureStore for this repository.
388
622
  >>> fs.setup()
389
623
  True
@@ -400,9 +634,8 @@ class FeatureStore:
400
634
  >>> fs.revoke.read_write('BoB')
401
635
  True
402
636
  """
403
- table_names = {name: UtilFuncs._get_qualified_table_name(self.__repo, table_name)
404
- for name, table_name in EFS_TABLES.items()}
405
- return Revoke(list(table_names.values()))
637
+ return Revoke(objects=AccessQueries,
638
+ database=self.__repo)
406
639
 
407
640
  def repair(self):
408
641
  """
@@ -422,134 +655,110 @@ class FeatureStore:
422
655
  bool
423
656
 
424
657
  RAISES:
425
- TeradatamlException
658
+ None
426
659
 
427
660
  EXAMPLES:
428
- # Repair FeatureStore repo 'vfs_v1'.
661
+ # Example 1: Repair FeatureStore repo 'vfs_v1'.
662
+ # Create FeatureStore for repo 'vfs_v1'.
429
663
  >>> from teradataml import FeatureStore
430
664
  >>> fs = FeatureStore("vfs_v1")
431
- >>> fs.repair()
432
- True
433
- >>>
434
- """
435
-
436
- # Repair Features, Entities and DataSources first. Then FeatureGroup and then Group Features.
437
- group_features_ = [EFS_GROUP_FEATURES_STAGING_SPEC, EFS_GROUP_FEATURES_SPEC, EFS_GROUP_FEATURES_TRG, "GroupFeatures"]
438
- feature_group_ = [EFS_FEATURE_GROUP_STAGING_SPEC, EFS_FEATURE_GROUP_SPEC, EFS_FEATURE_GROUP_TRG, "FeatureGroup"]
439
- featuers_ = [EFS_FEATURES_STAGING_SPEC, EFS_FEATURES_SPEC, EFS_FEATURES_TRG, "Feature"]
440
- entities_ = [EFS_ENTITY_STAGING_SPEC, EFS_ENTITY_SPEC, EFS_ENTITY_TRG, "Entity"]
441
- entities_xref_ = [EFS_ENTITY_XREF_STAGING_SPEC, EFS_ENTITY_XREF_SPEC, EFS_ENTITY_XREF_TRG, "EntityXref"]
442
- data_sources_ = [EFS_DATA_SOURCE_STAGING_SPEC, EFS_DATA_SOURCE_SPEC, EFS_DATA_SOURCE_TRG, "DataSource"]
443
-
444
-
445
- for staging_table_, table_, trigger, obj_name in (group_features_, feature_group_, featuers_, entities_, entities_xref_, data_sources_):
446
- status = []
447
- print("Repairing objects related to {}.".format(obj_name))
448
-
449
- status.append(self.__try_create_table(staging_table_))
450
- status.append(self.__try_create_table(table_))
451
- status.append(self.__try_create_trigger(trigger, "{}_trg".format(table_["table_name"])))
452
-
453
- # Let user know about status.
454
- # If any of the status is Bad, then repair is failed.
455
- # Else, If any of the status is Repaired, then sucessfully repaired.
456
- # Else no need to repair the object.
457
- if self.__bad_status in status:
458
- print("Unable to repair objects related to {}.".format(obj_name))
459
- else:
460
- if self.__repaired_status in status:
461
- print("Successfully repaired objects related to {}.".format(obj_name))
462
- else:
463
- print("{} objects are good and do not need any repair.".format(obj_name))
464
-
465
- # Repair the version table.
466
- status = self.__try_create_table(EFS_VERSION_SPEC)
467
- if status == self.__repaired_status:
468
- # After the setup is done, populate the version.
469
- insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, EFS_VERSION_SPEC["table_name"])
470
- execute_sql(insert_model, (EFS_VERSION, datetime.datetime.now()))
471
-
472
- return True
473
-
474
- def __try_create_table(self, table_spec):
475
- """
476
- DESCRIPTION:
477
- Internal function to create a table from table spec.
478
-
479
- PARAMETERS:
480
- table_spec:
481
- Required Argument.
482
- Specifies the spec for the corresponding table.
483
- Types: dict
665
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
484
666
 
485
- RETURNS:
486
- str
487
- Note:
488
- Method can return three different values of strings.
489
- * Good - When table to create already exists.
490
- * Repaired - When is created.
491
- * Bad - When table not exists and method unable to create table.
667
+ # Setup FeatureStore for this repository.
668
+ >>> fs.setup()
492
669
 
493
- RAISES:
494
- None
670
+ # Drop the data_source_staging table to simulate the missing object.
671
+ >>> from teradataml import db_drop_table
672
+ >>> db_drop_table(schema_name='vfs_v1', table_name=EFS_DB_COMPONENTS['data_source_staging'])
495
673
 
496
- EXAMPLES:
497
- self.__try_create_table(EFS_VERSION_SPEC)
498
- """
499
- try:
500
- _create_table(table_spec["table_name"],
501
- columns=table_spec["columns"],
502
- primary_index=table_spec.get("primary_index"),
503
- unique=True if table_spec.get("primary_index") else False,
504
- schema_name=self.__repo,
505
- set_table=False)
506
- return self.__repaired_status
507
- except Exception as e:
508
- if "Table '{}' already exists".format(table_spec["table_name"]) in str(e):
509
- return self.__good_status
510
- else:
511
- print(str(e))
512
- return self.__bad_status
674
+ # Verify the missing object by creating FeatureStore again.
675
+ >>> fs = FeatureStore("vfs_v1")
676
+ Some of the feature store objects are missing. Run FeatureStore.repair() to create missing objects.
513
677
 
514
- def __try_create_trigger(self, trigger_spec, trigger_name):
678
+ >>> fs.repair()
679
+ Successfully repaired the following objects: _efs_data_source_staging
680
+ True
515
681
  """
516
- DESCRIPTION:
517
- Internal function to create trigger.
518
-
519
- PARAMETERS:
520
- trigger_spec:
521
- Required Argument.
522
- Specifies the spec for the corresponding trigger.
523
- Types: str
524
-
525
- trigger_name:
526
- Required Argument.
527
- Specifies the name of the trigger to create.
528
- Types: str
529
-
530
- RETURNS:
531
- str
532
- Note:
533
- Method can return three different values of strings.
534
- * Good - When trigger to create already exists.
535
- * Repaired - When trigger is created.
536
- * Bad - When trigger not exists and method unable to create trigger.
682
+ # Check whether the repo exists or not.
683
+ if not self.__repo_exists:
684
+ print("Repo '{}' does not exist. Run FeatureStore.setup() " \
685
+ "to create the repo and setup FeatureStore.".format(self.__repo))
686
+ return False
687
+
688
+ # Get all existing EFS tables in the repo
689
+ existing_tabs = db_list_tables(schema_name=self.__repo, object_name='_efs%')
690
+ existing_tables = set(existing_tabs['TableName'].tolist())
691
+
692
+ # Get non-existing tables in the order of EFS_TABLES.values()
693
+ non_existing_tables = {
694
+ table_spec: table_name
695
+ for table_spec, table_name in EFS_TABLES.items()
696
+ if table_name not in existing_tables
697
+ }
698
+
699
+ # Get all existing EFS triggers in the repo
700
+ sql = SQLBundle()._get_sql_query(SQLConstants.SQL_LIST_TRIGGERS).format(self.__repo, '_efs%')
701
+ existing_triggers = {row[0] for row in execute_sql(sql).fetchall()}
702
+
703
+ # Get non-existing triggers in the order of EFS_TRIGGERS.values()
704
+ non_existing_triggers = {
705
+ trigger_spec: trigger_name
706
+ for trigger_spec, trigger_name in EFS_TRIGGERS.items()
707
+ if trigger_name not in existing_triggers
708
+ }
709
+
710
+ # Check if feature_version view exists (it shows up in existing_tables from db_list_tables)
711
+ feature_version_exists = self.__table_names['feature_version'] in existing_tables
712
+
713
+ # Return False only if all tables, triggers, and views exist
714
+ if not non_existing_tables and not non_existing_triggers and feature_version_exists:
715
+ print("repo '{}' is ready to use and do not need any repair.".format(self.__repo))
716
+ return False
537
717
 
538
- RAISES:
539
- None
718
+ failed_creation = []
719
+ created = []
720
+ # Iterating over EFS_TABLES based on the non-existing tables
721
+ for table_spec, table_name in non_existing_tables.items():
722
+ try:
723
+ execute_sql(table_spec.format(self.__repo, table_name))
724
+ created.append(table_name)
725
+ except Exception as e:
726
+ # If any table creation fails, then add it to the failed list
727
+ failed_creation.append((f"Table '{table_name}'", str(e)))
728
+
729
+ # Iterating over EFS_TRIGGERS based on the non-existing triggers
730
+ for trigger_spec, trigger_name in non_existing_triggers.items():
731
+ alter_name = trigger_name.split('_trg')[0]
732
+ insert_name = self.__repo + '.' + alter_name + '_staging'
733
+ try:
734
+ execute_sql(trigger_spec.format(self.__repo, trigger_name,
735
+ alter_name, insert_name))
736
+ created.append(trigger_name)
737
+ except Exception as e:
738
+ # If any trigger creation fails, then add it to the failed list
739
+ failed_creation.append((f"Trigger '{trigger_name}'", str(e)))
740
+
741
+ # Create feature versions view if it doesn't exist
742
+ if not feature_version_exists:
743
+ try:
744
+ sql = EFS_FEATURE_VERSION.format(self.__repo,
745
+ EFS_DB_COMPONENTS['feature_version'],
746
+ self.__repo,
747
+ self.__table_names['feature_process'])
748
+ execute_sql(sql)
749
+ created.append(EFS_DB_COMPONENTS['feature_version'])
750
+ except Exception as e:
751
+ failed_creation.append((f"View '{EFS_DB_COMPONENTS['feature_version']}'", str(e)))
752
+
753
+ # If any of the table or trigger creation fails, then return False
754
+ if failed_creation:
755
+ print("The following objects could not be repaired:")
756
+ for obj, reason in failed_creation:
757
+ print(f" - {obj}: {reason}")
758
+ return False
540
759
 
541
- EXAMPLES:
542
- self.__try_create_trigger(EFS_FEATURE_TRIGGER_SPEC)
543
- """
544
- try:
545
- execute_sql(trigger_spec.format(schema_name=self.__repo))
546
- return self.__repaired_status
547
- except Exception as e:
548
- if "Trigger '{}' already exists".format(trigger_name) in str(e):
549
- return self.__good_status
550
- else:
551
- print("Unable to create trigger '{}'. Error - {}".format(trigger_name, str(e)))
552
- return self.__bad_status
760
+ print("Successfully repaired the following objects: {}".format(", ".join(created)))
761
+ return True
553
762
 
554
763
  def list_features(self, archived=False) -> DataFrame:
555
764
  """
@@ -573,38 +782,45 @@ class FeatureStore:
573
782
 
574
783
  EXAMPLES:
575
784
  >>> from teradataml import DataFrame, FeatureStore, load_example_data
576
- >>> load_example_data('dataframe', 'sales')
577
- # Create FeatureStore for repo 'vfs_v1'.
578
- >>> fs = FeatureStore("vfs_v1")
579
785
  # Create teradataml DataFrame.
786
+ >>> load_example_data("dataframe", "sales")
580
787
  >>> df = DataFrame("sales")
788
+
789
+ # Create FeatureStore for repo 'vfs_v1'.
790
+ >>> fs = FeatureStore("vfs_v1")
791
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
792
+ # Setup FeatureStore for this repository.
793
+ >>> fs.setup()
794
+ True
795
+
581
796
  # Create a FeatureGroup from teradataml DataFrame.
582
797
  >>> fg = FeatureGroup.from_DataFrame(name='sales',
583
798
  ... entity_columns='accounts',
584
799
  ... df=df,
585
- ... timestamp_col_name='datetime')
800
+ ... timestamp_column='datetime')
586
801
  # Apply the FeatureGroup to FeatureStore.
587
802
  >>> fs.apply(fg)
588
803
  True
589
804
 
590
805
  # Example 1: List all the effective Features in the repo 'vfs_v1'.
591
806
  >>> fs.list_features()
592
- column_name description creation_time modified_time tags data_type feature_type status group_name
593
- name
594
- Mar Mar None 2024-09-30 11:21:43.314118 None None BIGINT CONTINUOUS ACTIVE sales
595
- Jan Jan None 2024-09-30 11:21:42.655343 None None BIGINT CONTINUOUS ACTIVE sales
596
- Apr Apr None 2024-09-30 11:21:44.143402 None None BIGINT CONTINUOUS ACTIVE sales
597
- Feb Feb None 2024-09-30 11:21:41.542627 None None FLOAT CONTINUOUS ACTIVE sales
598
- >>>
807
+ id column_name description tags data_type feature_type status creation_time modified_time group_name
808
+ name data_domain
809
+ Apr ALICE 4 Apr None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:31.262501 None sales
810
+ Jan ALICE 2 Jan None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:30.056273 None sales
811
+ Mar ALICE 3 Mar None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:30.678060 None sales
812
+ Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 03:17:29.403242 None sales
599
813
 
600
814
  # Example 2: List all the archived Features in the repo 'vfs_v1'.
601
815
  # Note: Feature can only be archived when it is not associated with any Group.
602
816
  # Let's remove Feature 'Feb' from FeatureGroup.
603
- >>> fg.remove(fs.get_feature('Feb'))
817
+ >>> fg.remove_feature(fs.get_feature('Feb'))
604
818
  True
819
+
605
820
  # Apply the modified FeatureGroup to FeatureStore.
606
821
  >>> fs.apply(fg)
607
822
  True
823
+
608
824
  # Archive Feature 'Feb'.
609
825
  >>> fs.archive_feature('Feb')
610
826
  Feature 'Feb' is archived.
@@ -612,8 +828,8 @@ class FeatureStore:
612
828
 
613
829
  # List all the archived Features in the repo 'vfs_v1'.
614
830
  >>> fs.list_features(archived=True)
615
- name column_name description creation_time modified_time tags data_type feature_type status archived_time group_name
616
- 0 Feb Feb None 2024-09-30 11:21:41.542627 None None FLOAT CONTINUOUS ACTIVE 2024-09-30 11:30:49.160000 sales
831
+ id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
832
+ 0 1 Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 03:17:29.403242 None 2025-07-28 03:19:58.950000 sales
617
833
  >>>
618
834
  """
619
835
  return self.__get_archived_features_df() if archived else self.__get_features_df()
@@ -640,25 +856,31 @@ class FeatureStore:
640
856
 
641
857
  EXAMPLES:
642
858
  >>> from teradataml import DataFrame, FeatureStore, load_example_data
643
- >>> load_example_data('dataframe', 'sales')
644
859
  # Create FeatureStore for repo 'vfs_v1'.
645
860
  >>> fs = FeatureStore("vfs_v1")
861
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
862
+ # Setup FeatureStore for this repository.
863
+ >>> fs.setup()
864
+ True
865
+
646
866
  # Create teradataml DataFrame.
867
+ >>> load_example_data("dataframe", "sales")
647
868
  >>> df = DataFrame("sales")
869
+
648
870
  # Create a FeatureGroup from teradataml DataFrame.
649
871
  >>> fg = FeatureGroup.from_DataFrame(name='sales',
650
872
  ... entity_columns='accounts',
651
873
  ... df=df,
652
- ... timestamp_col_name='datetime')
874
+ ... timestamp_column='datetime')
653
875
  # Apply the FeatureGroup to FeatureStore.
654
876
  >>> fs.apply(fg)
655
877
  True
656
878
 
657
879
  # Example 1: List all the effective Entities in the repo 'vfs_v1'.
658
880
  >>> fs.list_entities()
659
- description
660
- name entity_column
661
- sales accounts None
881
+ description creation_time modified_time entity_column
882
+ name data_domain
883
+ sales ALICE None 2025-07-28 03:17:31.558796 2025-07-28 03:19:41.233953 accounts
662
884
  >>>
663
885
 
664
886
  # Example 2: List all the archived Entities in the repo 'vfs_v1'.
@@ -668,10 +890,13 @@ class FeatureStore:
668
890
  >>> entity = Entity('store_sales', columns=df.accounts)
669
891
  # Update new entity to FeatureGroup.
670
892
  >>> fg.apply(entity)
893
+ True
894
+
671
895
  # Update FeatureGroup to FeatureStore. This will update Entity
672
896
  # from 'sales' to 'store_sales' for FeatureGroup 'sales'.
673
897
  >>> fs.apply(fg)
674
898
  True
899
+
675
900
  # Let's archive Entity 'sales' since it is not part of any FeatureGroup.
676
901
  >>> fs.archive_entity('sales')
677
902
  Entity 'sales' is archived.
@@ -680,8 +905,9 @@ class FeatureStore:
680
905
 
681
906
  # List the archived entities.
682
907
  >>> fs.list_entities(archived=True)
683
- name description creation_time modified_time archived_time entity_column
684
- 0 sales None 2024-10-18 05:41:36.932856 None 2024-10-18 05:50:00.930000 accounts
908
+ description creation_time modified_time entity_column
909
+ name data_domain
910
+ store_sales ALICE None 2025-07-28 03:23:40.322424 None accounts
685
911
  >>>
686
912
  """
687
913
  return self.__get_archived_entity_df() if archived else self.__get_entity_df()
@@ -708,11 +934,17 @@ class FeatureStore:
708
934
 
709
935
  EXAMPLES:
710
936
  >>> from teradataml import DataSource, FeatureStore, load_example_data
711
- >>> load_example_data("dataframe", "admissions_train")
712
937
  # Create teradataml DataFrame.
713
- >>> admissions=DataFrame("admissions_train")
938
+ >>> load_example_data("dataframe", "admissions_train")
939
+ >>> admissions = DataFrame("admissions_train")
940
+
714
941
  # Create FeatureStore for repo 'vfs_v1'.
715
942
  >>> fs = FeatureStore("vfs_v1")
943
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
944
+ # Setup FeatureStore for this repository.
945
+ >>> fs.setup()
946
+ True
947
+
716
948
  # Create DataSource using teradataml DataFrame.
717
949
  >>> ds = DataSource(name='admissions', source=admissions)
718
950
  # Apply the DataSource to FeatureStore.
@@ -721,21 +953,20 @@ class FeatureStore:
721
953
 
722
954
  # Example 1: List all the effective DataSources in the repo 'vfs_v1'.
723
955
  >>> fs.list_data_sources()
724
- description timestamp_col_name source
725
- name
726
- admissions None None select * from "admissions_train"
727
- >>>
956
+ description timestamp_column source creation_time modified_time
957
+ name data_domain
958
+ admissions ALICE None None select * from "admissions_train" 2025-07-28 03:26:53.507807 None
728
959
 
729
960
  # Example 2: List all the archived DataSources in the repo 'vfs_v1'.
730
961
  # Let's first archive the DataSource.
731
962
  >>> fs.archive_data_source('admissions')
732
963
  DataSource 'admissions' is archived.
733
964
  True
965
+
734
966
  # List archived DataSources.
735
967
  >>> fs.list_data_sources(archived=True)
736
- description timestamp_col_name source archived_time
737
- name
738
- admissions None None select * from "admissions_train" 2024-09-30 12:05:39.220000
968
+ name data_domain description timestamp_column source creation_time modified_time archived_time
969
+ 0 admissions ALICE None None select * from "admissions_train" 2025-07-28 03:26:53.507807 None 2025-07-28 03:28:17.160000
739
970
  >>>
740
971
  """
741
972
  return self.__get_archived_data_source_df() if archived else self.__get_data_source_df()
@@ -762,11 +993,17 @@ class FeatureStore:
762
993
 
763
994
  EXAMPLES:
764
995
  >>> from teradataml import FeatureGroup, FeatureStore, load_example_data
765
- >>> load_example_data("dataframe", "admissions_train")
766
996
  # Create teradataml DataFrame.
997
+ >>> load_example_data("dataframe", "admissions_train")
767
998
  >>> admissions=DataFrame("admissions_train")
999
+
768
1000
  # Create FeatureStore for repo 'vfs_v1'.
769
1001
  >>> fs = FeatureStore("vfs_v1")
1002
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1003
+ # Setup FeatureStore for this repository.
1004
+ >>> fs.setup()
1005
+ True
1006
+
770
1007
  # Create a FeatureGroup from DataFrame.
771
1008
  >>> fg = FeatureGroup.from_DataFrame("admissions", df=admissions, entity_columns='id')
772
1009
  # Apply FeatureGroup to FeatureStore.
@@ -775,60 +1012,285 @@ class FeatureStore:
775
1012
 
776
1013
  # Example 1: List all the effective FeatureGroups in the repo 'vfs_v1'.
777
1014
  >>> fs.list_feature_groups()
778
- description data_source_name entity_name
779
- name
780
- admissions None admissions admissions
781
- >>>
1015
+ description data_source_name entity_name creation_time modified_time
1016
+ name data_domain
1017
+ admissions ALICE None admissions admissions 2025-07-28 03:30:04.115331 None
782
1018
 
783
1019
  # Example 2: List all the archived FeatureGroups in the repo 'vfs_v1'.
784
1020
  # Let's first archive the FeatureGroup.
785
1021
  >>> fs.archive_feature_group("admissions")
786
1022
  True
787
- >>>
1023
+
788
1024
  # List archived FeatureGroups.
789
1025
  >>> fs.list_feature_groups(archived=True)
790
- name description data_source_name entity_name archived_time
791
- 0 admissions None admissions admissions 2024-09-30 12:05:39.220000
1026
+ name data_domain description data_source_name entity_name creation_time modified_time archived_time
1027
+ 0 admissions ALICE None admissions admissions 2025-07-28 03:30:04.115331 None 2025-07-28 03:31:04.550000
792
1028
  >>>
793
1029
  """
794
1030
  return self.__get_archived_feature_group_df() if archived else self.__get_feature_group_df()
795
1031
 
796
- def get_feature(self, name):
1032
+ def list_data_domains(self) -> DataFrame:
797
1033
  """
798
1034
  DESCRIPTION:
799
- Retrieve the feature.
1035
+ Lists all the data domains.
800
1036
 
801
1037
  PARAMETERS:
802
- name:
803
- Required Argument.
804
- Specifies the name of the feature to get.
805
- Types: str
1038
+ None
806
1039
 
807
1040
  RETURNS:
808
- Feature.
1041
+ teradataml DataFrame
809
1042
 
810
1043
  RAISES:
811
- TeradataMLException
1044
+ None
812
1045
 
813
1046
  EXAMPLES:
814
- >>> from teradataml import DataFrame, FeatureStore, load_example_data
815
- # Load the sales data to Vantage.
816
- >>> load_example_data("dataframe", "sales")
817
- # Create DataFrame on sales data.
818
- >>> df = DataFrame("sales")
819
- >>> df
820
- Feb Jan Mar Apr datetime
821
- accounts
1047
+ # Example 1: List all the data domains in the repo 'vfs_v1'.
1048
+ >>> from teradataml import FeatureStore
1049
+ # Create FeatureStore for repo 'vfs_v1' with data_domain 'd1'.
1050
+ >>> fs = FeatureStore("vfs_v1", data_domain='d1')
1051
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1052
+
1053
+ # List all the data domains in the repo 'vfs_v1'.
1054
+ >>> fs.list_data_domains()
1055
+ name created_time
1056
+ 0 d1 2025-04-30 11:21:40.123456
1057
+ """
1058
+ return self.__get_data_domain_df()
1059
+
1060
+ def list_feature_processes(self, archived=False) -> DataFrame:
1061
+ """
1062
+ DESCRIPTION:
1063
+ Lists all the feature processes.
1064
+
1065
+ PARAMETERS:
1066
+ archived:
1067
+ Optional Argument.
1068
+ Specifies whether to retrieve archived feature processes or not.
1069
+ When set to True, archived feature processes in FeatureStore are listed.
1070
+ Otherwise, all feature processes are listed.
1071
+ Default Value: False
1072
+ Types: bool
1073
+
1074
+ RETURNS:
1075
+ teradataml DataFrame
1076
+
1077
+ RAISES:
1078
+ None
1079
+
1080
+ EXAMPLES:
1081
+ # Example 1: List all the feature processes in the repo 'vfs_v1'.
1082
+ >>> from teradataml import FeatureStore
1083
+
1084
+ # Create FeatureStore 'vfs_v1' or use existing one.
1085
+ >>> fs = FeatureStore("vfs_v1")
1086
+ FeatureStore is ready to use.
1087
+
1088
+ # Load the sales data.
1089
+ >>> load_example_data("dataframe", "sales")
1090
+ >>> df = DataFrame("sales")
1091
+
1092
+ # Create a feature process.
1093
+ >>> from teradataml import FeatureProcess
1094
+ >>> fp = FeatureProcess(repo="vfs_v1",
1095
+ ... data_domain='sales',
1096
+ ... object=df,
1097
+ ... entity="accounts",
1098
+ ... features=["Jan", "Feb", "Mar", "Apr"])
1099
+ >>> fp.run()
1100
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
1101
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
1102
+
1103
+ # List all the feature processes in the repo 'vfs_v1'.
1104
+ >>> fs.list_feature_processes()
1105
+ description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
1106
+ process_id
1107
+ 5747082b-4acb-11f0-a2d7-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-06-16 16:02:55.260000+00: 9999-12-31 23:59:59.999999+00:
1108
+
1109
+ # Example 2: List all the archived feature processes in the repo 'vfs_v1'.
1110
+
1111
+ # Let's check the archived feature processes before archiving feature process.
1112
+ >>> fs.list_feature_processes(archived=True)
1113
+ process_id start_time end_time status filter as_of_start as_of_end failure_reason
1114
+
1115
+ # Archive the feature process by passing the process_id.
1116
+ >>> fs.archive_feature_process('5747082b-4acb-11f0-a2d7-f020ffe7fe09')
1117
+ Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
1118
+ Feature 'Feb' is archived from metadata.
1119
+ Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
1120
+ Feature 'Jan' is archived from metadata.
1121
+ Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
1122
+ Feature 'Mar' is archived from metadata.
1123
+ Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
1124
+ Feature 'Apr' is archived from metadata.
1125
+ FeatureProcess with process id '5747082b-4acb-11f0-a2d7-f020ffe7fe09' is archived.
1126
+ True
1127
+
1128
+ # List all the archived feature processes in the repo 'vfs_v1'.
1129
+ >>> fs.list_feature_processes(archived=True)
1130
+ description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
1131
+ process_id
1132
+ 5747082b-4acb-11f0-a2d7-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-06-16 16:02:55.260000+00: 2025-06-16 16:04:32.260000+00:
1133
+
1134
+ """
1135
+ validate_params = []
1136
+ validate_params.append(["archived", archived, True, bool, True])
1137
+ # Validate argument types
1138
+ _Validators._validate_function_arguments(validate_params)
1139
+
1140
+ f_process_df = self.__get_without_valid_period_df(self.__get_feature_process_df())
1141
+ f_process_df = f_process_df[f_process_df.data_domain == self.__data_domain]
1142
+
1143
+ if archived:
1144
+ # Filter out the active feature process. Only archived features are returned.
1145
+ f_process_df = f_process_df[(Col("valid_end") <= Col('current_timestamp'))]
1146
+
1147
+ return f_process_df
1148
+
1149
+ def list_feature_runs(self):
1150
+ """
1151
+ DESCRIPTION:
1152
+ Lists all the feature runs in the FeatureStore.
1153
+
1154
+ PARAMETERS:
1155
+ None
1156
+
1157
+ RETURNS:
1158
+ teradataml DataFrame
1159
+
1160
+ RAISES:
1161
+ None
1162
+
1163
+ EXAMPLES:
1164
+ # Example 1: List all the feature runs in the repo 'vfs_v1'.
1165
+ >>> from teradataml import FeatureStore
1166
+
1167
+ # Create a FeatureStore 'vfs_v1' or use existing one.
1168
+ >>> fs = FeatureStore("vfs_v1")
1169
+ FeatureStore is ready to use.
1170
+
1171
+ # Load the sales data.
1172
+ >>> load_example_data("dataframe", "sales")
1173
+ >>> df = DataFrame("sales")
1174
+
1175
+ # Create a feature process.
1176
+ >>> from teradataml import FeatureProcess
1177
+ >>> fp = FeatureProcess(repo="vfs_v1",
1178
+ ... data_domain='test_domain',
1179
+ ... object=df,
1180
+ ... entity='accounts',
1181
+ ... features=['Mar', 'Apr'])
1182
+ >>> fp.run(filters=[df.accounts=='Alpha Co', "accounts='Jones LLC'"])
1183
+ Process '11b62599-692f-11f0-ad19-f020ffe7fe09' started.
1184
+ Ingesting the features for filter 'accounts = 'Alpha Co'' to catalog.
1185
+ Ingesting the features for filter 'accounts='Jones LLC'' to catalog.
1186
+ Process '11b62599-692f-11f0-ad19-f020ffe7fe09' completed.
1187
+ True
1188
+
1189
+ # List all the feature runs in the repo 'vfs_v1'.
1190
+ >>> fs.list_feature_runs()
1191
+ process_id data_domain start_time end_time status filter as_of_start as_of_end failure_reason
1192
+ run_id
1193
+ 1 11b62599-692f-11f0-ad19-f020ffe7fe09 test_domain 2025-07-25 08:12:13.001968 2025-07-25 08:12:13.001968 completed accounts = 'Alpha Co', accounts='Jones LLC' None None None
1194
+ """
1195
+ return self.__get_feature_runs_df()
1196
+
1197
+ def list_dataset_catalogs(self) -> DataFrame:
1198
+ """
1199
+ DESCRIPTION:
1200
+ Lists all the dataset catalogs.
1201
+
1202
+ PARAMETERS:
1203
+ None
1204
+
1205
+ RETURNS:
1206
+ teradataml DataFrame
1207
+
1208
+ RAISES:
1209
+ None
1210
+
1211
+ EXAMPLES:
1212
+ # Example 1: List all the dataset catalogs in the repo 'vfs_v1'.
1213
+ >>> from teradataml import FeatureStore
1214
+
1215
+ # Create FeatureStore 'vfs_v1' or use existing one.
1216
+ >>> fs = FeatureStore("vfs_v1", data_domain='sales')
1217
+ FeatureStore is ready to use.
1218
+
1219
+ # Load the sales data.
1220
+ >>> load_example_data("dataframe", "sales")
1221
+ >>> df = DataFrame("sales")
1222
+
1223
+ # Create a feature process.
1224
+ >>> from teradataml import FeatureProcess
1225
+ >>> fp = FeatureProcess(repo="vfs_v1",
1226
+ ... data_domain='sales',
1227
+ ... object=df,
1228
+ ... entity="accounts",
1229
+ ... features=["Jan", "Feb", "Mar", "Apr"])
1230
+ >>> fp.run()
1231
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
1232
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
1233
+
1234
+ # create a dataset catalog.
1235
+ >>> from teradataml import DatasetCatalog
1236
+ >>> dc = DatasetCatalog(repo='vfs_v1', data_domain='sales')
1237
+ >>> dataset = dc.build_dataset(entity='accounts',
1238
+ ... selected_features = {
1239
+ ... 'Jan': '5747082b-4acb-11f0-a2d7-f020ffe7fe09',
1240
+ ... 'Feb': '5747082b-4acb-11f0-a2d7-f020ffe7fe09'},
1241
+ ... view_name='ds_jan_feb',
1242
+ ... description='Dataset with Jan and Feb features')
1243
+
1244
+ # List all the dataset catalogs in the repo 'vfs_v1'.
1245
+ >>> fs.list_dataset_catalogs()
1246
+ data_domain name entity_name database_name description valid_start valid_end
1247
+ id
1248
+ 4f763a7b-8920-448c-87af-432e7d36c9cb sales ds_jan_feb accounts vfs_v1 Dataset with Jan and Feb features 2025-06-16 16:15:17.577637+00: 9999-12-31 23:59:59.999999+00:
1249
+ """
1250
+ return self.__get_without_valid_period_df(self.__get_dataset_catalog_df())
1251
+
1252
+ def get_feature(self, name):
1253
+ """
1254
+ DESCRIPTION:
1255
+ Retrieve the feature.
1256
+
1257
+ PARAMETERS:
1258
+ name:
1259
+ Required Argument.
1260
+ Specifies the name of the feature to get.
1261
+ Types: str
1262
+
1263
+ RETURNS:
1264
+ Feature.
1265
+
1266
+ RAISES:
1267
+ TeradataMLException
1268
+
1269
+ EXAMPLES:
1270
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
1271
+ # Create DataFrame on sales data.
1272
+ >>> load_example_data("dataframe", "sales")
1273
+ >>> df = DataFrame("sales")
1274
+ >>> df
1275
+ Feb Jan Mar Apr datetime
1276
+ accounts
822
1277
  Orange Inc 210.0 NaN NaN 250.0 04/01/2017
823
1278
  Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
824
1279
  Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
825
1280
  Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
826
1281
  Yellow Inc 90.0 NaN NaN NaN 04/01/2017
827
- >>>
1282
+
1283
+ # Create a FeatureStore for repo 'vfs_v1'.
1284
+ >>> fs = FeatureStore("vfs_v1")
1285
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1286
+ # Setup FeatureStore for this repository.
1287
+ >>> fs.setup()
1288
+ True
1289
+
828
1290
  # Create Feature for column 'Mar' with name 'sales_mar'.
829
1291
  >>> feature = Feature('sales_mar', column=df.Mar)
1292
+
830
1293
  # Apply the Feature to FeatureStore.
831
- >>> fs = FeatureStore("vfs_v1")
832
1294
  >>> fs.apply(feature)
833
1295
  True
834
1296
 
@@ -836,7 +1298,6 @@ class FeatureStore:
836
1298
  >>> feature = fs.get_feature('sales_mar')
837
1299
  >>> feature
838
1300
  Feature(name=sales_mar)
839
- >>>
840
1301
  """
841
1302
  argument_validation_params = []
842
1303
  argument_validation_params.append(["name", name, False, (str), True])
@@ -844,16 +1305,24 @@ class FeatureStore:
844
1305
  # Validate argument types
845
1306
  _Validators._validate_function_arguments(argument_validation_params)
846
1307
 
847
- df = self.list_features()
848
- df = df[df.name == name]
1308
+ # Check if the feature exists in the current data domain.
1309
+ df = self.__get_features_wog_df()
1310
+ df = df[(df['name'] == name) &
1311
+ (df['data_domain'] == self.__data_domain)]
849
1312
 
850
- # Check if a feature with that name exists or not. If not, raise error.
1313
+ # If no records found, check if the feature exists in any domain.
851
1314
  if df.shape[0] == 0:
852
- msg_code = MessageCodes.FUNC_EXECUTION_FAILED
853
- error_msg = Messages.get_message(
854
- msg_code, "get_feature()", "Feature with name '{}' does not exist.".format(name))
1315
+ res = _FSUtils._get_data_domains(self.__repo, name, 'feature')
1316
+ if res:
1317
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
1318
+ error_msg = Messages.get_message(msg_code, "Feature", "name '{}'".format(name),
1319
+ self.__data_domain, res)
1320
+ else:
1321
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
1322
+ error_msg = Messages.get_message(msg_code, "Feature", "name '{}'".format(name),
1323
+ self.__data_domain)
855
1324
  raise TeradataMlException(error_msg, msg_code)
856
-
1325
+
857
1326
  return Feature._from_df(df)
858
1327
 
859
1328
  def get_group_features(self, group_name):
@@ -875,11 +1344,10 @@ class FeatureStore:
875
1344
 
876
1345
  EXAMPLES:
877
1346
  >>> from teradataml import DataFrame, FeatureStore, load_example_data
878
- # Load the sales data to Vantage.
879
- >>> load_example_data("dataframe", "sales")
1347
+
880
1348
  # Create DataFrame on sales data.
1349
+ >>> load_example_data("dataframe", "sales")
881
1350
  >>> df = DataFrame("sales")
882
- >>> df
883
1351
  >>> df
884
1352
  Feb Jan Mar Apr datetime
885
1353
  accounts
@@ -888,12 +1356,18 @@ class FeatureStore:
888
1356
  Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
889
1357
  Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
890
1358
  Yellow Inc 90.0 NaN NaN NaN 04/01/2017
891
- >>>
1359
+
1360
+ # Create FeatureStore for repo 'vfs_v1'.
1361
+ >>> fs = FeatureStore("vfs_v1")
1362
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1363
+ # Setup FeatureStore for this repository.
1364
+ >>> fs.setup()
1365
+ True
1366
+
892
1367
  # Create FeatureGroup with name 'sales' from DataFrame.
893
1368
  >>> fg = FeatureGroup.from_DataFrame(
894
- ... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
1369
+ ... name="sales", df=df, entity_columns="accounts", timestamp_column="datetime")
895
1370
  # Apply the FeatureGroup to FeatureStore.
896
- >>> fs = FeatureStore("vfs_v1")
897
1371
  >>> fs.apply(fg)
898
1372
  True
899
1373
 
@@ -911,13 +1385,21 @@ class FeatureStore:
911
1385
 
912
1386
  # Select active features.
913
1387
  features_df = self.__get_features_df()
914
- features_df = features_df[((features_df.status != FeatureStatus.INACTIVE.name) & (features_df.group_name == group_name))]
1388
+ features_df = features_df[((features_df.status != FeatureStatus.INACTIVE.name) &
1389
+ (features_df.group_name == group_name) &
1390
+ (features_df.data_domain == self.__data_domain))]
915
1391
 
916
1392
  # Check if a feature with that group name exists or not. If not, raise error.
917
1393
  if features_df.shape[0] == 0:
918
- msg_code = MessageCodes.FUNC_EXECUTION_FAILED
919
- error_msg = Messages.get_message(
920
- msg_code, "get_group_features()", "No features found for group '{}'.".format(group_name))
1394
+ res = _FSUtils._get_data_domains(self.__repo, group_name, 'group_features')
1395
+ if res:
1396
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
1397
+ error_msg = Messages.get_message(msg_code, "Features", "group name '{}'".format(group_name),
1398
+ self.__data_domain, res)
1399
+ else:
1400
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
1401
+ error_msg = Messages.get_message(msg_code, "Features", "group name '{}'".format(group_name),
1402
+ self.__data_domain)
921
1403
  raise TeradataMlException(error_msg, msg_code)
922
1404
 
923
1405
  return Feature._from_df(features_df)
@@ -941,9 +1423,8 @@ class FeatureStore:
941
1423
 
942
1424
  EXAMPLES:
943
1425
  >>> from teradataml import DataFrame, FeatureStore, load_example_data
944
- # Load the sales data to Vantage.
945
- >>> load_example_data("dataframe", "sales")
946
1426
  # Create DataFrame on sales data.
1427
+ >>> load_example_data("dataframe", "sales")
947
1428
  >>> df = DataFrame("sales")
948
1429
  >>> df
949
1430
  Feb Jan Mar Apr datetime
@@ -953,12 +1434,18 @@ class FeatureStore:
953
1434
  Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
954
1435
  Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
955
1436
  Yellow Inc 90.0 NaN NaN NaN 04/01/2017
956
- >>>
1437
+
1438
+ # Create FeatureStore for repo 'vfs_v1'.
1439
+ >>> fs = FeatureStore("vfs_v1")
1440
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1441
+ # Setup FeatureStore for this repository.
1442
+ >>> fs.setup()
1443
+ True
1444
+
957
1445
  # Create FeatureGroup with name 'sales' from DataFrame.
958
1446
  >>> fg = FeatureGroup.from_DataFrame(
959
- ... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
1447
+ ... name="sales", df=df, entity_columns="accounts", timestamp_column="datetime")
960
1448
  # Apply the FeatureGroup to FeatureStore.
961
- >>> fs = FeatureStore("vfs_v1")
962
1449
  >>> fs.apply(fg)
963
1450
  True
964
1451
 
@@ -975,20 +1462,28 @@ class FeatureStore:
975
1462
  _Validators._validate_function_arguments(argument_validation_params)
976
1463
 
977
1464
  df = self.list_feature_groups()
978
- df = df[df.name == name]
1465
+ df = df[(df['name'] == name) &
1466
+ (df['data_domain'] == self.__data_domain)]
979
1467
 
980
- # Check if a feature with that name exists or not. If not, raise error.
1468
+ # Check if a feature group with that name exists or not. If not, raise error.
981
1469
  if df.shape[0] == 0:
982
- msg_code = MessageCodes.FUNC_EXECUTION_FAILED
983
- error_msg = Messages.get_message(
984
- msg_code, "get_feature_group()", "FeatureGroup with name '{}' does not exist.".format(name))
1470
+ res = _FSUtils._get_data_domains(self.__repo, name, 'feature_group')
1471
+ if res:
1472
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
1473
+ error_msg = Messages.get_message(msg_code, "FeatureGroup", "name '{}'".format(name),
1474
+ self.__data_domain, res)
1475
+ else:
1476
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
1477
+ error_msg = Messages.get_message(msg_code, "FeatureGroup", "name '{}'".format(name),
1478
+ self.__data_domain)
985
1479
  raise TeradataMlException(error_msg, msg_code)
986
1480
 
987
1481
  return FeatureGroup._from_df(df,
988
1482
  self.__repo,
989
1483
  self.__get_features_df(),
990
1484
  self.__get_entity_df(),
991
- self.__get_data_source_df()
1485
+ self.__get_data_source_df(),
1486
+ data_domain=self.__data_domain
992
1487
  )
993
1488
 
994
1489
  def get_entity(self, name):
@@ -1010,9 +1505,8 @@ class FeatureStore:
1010
1505
 
1011
1506
  EXAMPLES:
1012
1507
  >>> from teradataml import DataFrame, Entity, FeatureStore, load_example_data
1013
- # Load the admissions data to Vantage.
1014
- >>> load_example_data("dataframe", "admissions_train")
1015
1508
  # Create DataFrame on admissions data.
1509
+ >>> load_example_data("dataframe", "admissions_train")
1016
1510
  >>> df = DataFrame("admissions_train")
1017
1511
  >>> df
1018
1512
  masters gpa stats programming admitted
@@ -1027,20 +1521,24 @@ class FeatureStore:
1027
1521
  26 yes 3.57 Advanced Advanced 1
1028
1522
  19 yes 1.98 Advanced Advanced 0
1029
1523
  13 no 4.00 Advanced Novice 1
1030
- >>>
1524
+
1525
+ # Create FeatureStore for repo 'vfs_v1'.
1526
+ >>> fs = FeatureStore("vfs_v1")
1527
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1528
+ # Setup FeatureStore for this repository.
1529
+ >>> fs.setup()
1530
+ True
1531
+
1031
1532
  # Create Entity for column 'id' with name 'admissions_id'.
1032
1533
  >>> entity = Entity(name='admissions_id', description="Entity for admissions", columns=df.id)
1033
1534
  # Apply the Entity to FeatureStore 'vfs_v1'.
1034
- >>> fs = FeatureStore('vfs_v1')
1035
1535
  >>> fs.apply(entity)
1036
1536
  True
1037
- >>>
1038
1537
 
1039
1538
  # Get the Entity 'admissions_id' from repo 'vfs_v1'
1040
1539
  >>> entity = fs.get_entity('admissions_id')
1041
1540
  >>> entity
1042
1541
  Entity(name=admissions_id)
1043
- >>>
1044
1542
  """
1045
1543
  argument_validation_params = []
1046
1544
  argument_validation_params.append(["name", name, False, (str), True])
@@ -1049,14 +1547,22 @@ class FeatureStore:
1049
1547
  _Validators._validate_function_arguments(argument_validation_params)
1050
1548
 
1051
1549
  df = self.__get_entity_df()
1052
- df = df[df.name==name]
1550
+ df = df[(df['name'] == name) &
1551
+ (df['data_domain'] == self.__data_domain)]
1053
1552
 
1054
1553
  # Check if entity with that name exists or not. If not, raise error.
1055
1554
  if df.shape[0] == 0:
1056
- msg_code = MessageCodes.FUNC_EXECUTION_FAILED
1057
- error_msg = Messages.get_message(
1058
- msg_code, "get_entity()", "Entity with name '{}' does not exist.".format(name))
1555
+ res = _FSUtils._get_data_domains(self.__repo, name, 'entity')
1556
+ if res:
1557
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
1558
+ error_msg = Messages.get_message(msg_code, "Entity", "name '{}'".format(name),
1559
+ self.__data_domain, res)
1560
+ else:
1561
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
1562
+ error_msg = Messages.get_message(msg_code, "Entity", "name '{}'".format(name),
1563
+ self.__data_domain)
1059
1564
  raise TeradataMlException(error_msg, msg_code)
1565
+
1060
1566
  return Entity._from_df(df)
1061
1567
 
1062
1568
  def get_data_source(self, name):
@@ -1078,9 +1584,8 @@ class FeatureStore:
1078
1584
 
1079
1585
  EXAMPLES:
1080
1586
  >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1081
- # Load the admissions data to Vantage.
1082
- >>> load_example_data("dataframe", "admissions_train")
1083
1587
  # Create DataFrame on admissions data.
1588
+ >>> load_example_data("dataframe", "admissions_train")
1084
1589
  >>> df = DataFrame("admissions_train")
1085
1590
  >>> df
1086
1591
  masters gpa stats programming admitted
@@ -1095,20 +1600,24 @@ class FeatureStore:
1095
1600
  26 yes 3.57 Advanced Advanced 1
1096
1601
  19 yes 1.98 Advanced Advanced 0
1097
1602
  13 no 4.00 Advanced Novice 1
1098
- >>>
1603
+
1604
+ # Create FeatureStore for repo 'vfs_v1'.
1605
+ >>> fs = FeatureStore("vfs_v1")
1606
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1607
+ # Setup FeatureStore for this repository.
1608
+ >>> fs.setup()
1609
+ True
1610
+
1099
1611
  # Create DataSource using DataFrame 'df' with name 'admissions'.
1100
1612
  >>> ds = DataSource('admissions', source=df)
1101
1613
  # Apply the DataSource to FeatureStore 'vfs_v1'.
1102
- >>> fs = FeatureStore('vfs_v1')
1103
1614
  >>> fs.apply(ds)
1104
1615
  True
1105
- >>>
1106
1616
 
1107
1617
  # Get the DataSource 'admissions' from repo 'vfs_v1'
1108
1618
  >>> ds = fs.get_data_source('admissions')
1109
1619
  >>> ds
1110
1620
  DataSource(name=admissions)
1111
- >>>
1112
1621
  """
1113
1622
  argument_validation_params = []
1114
1623
  argument_validation_params.append(["name", name, False, (str), True])
@@ -1117,102 +1626,265 @@ class FeatureStore:
1117
1626
  _Validators._validate_function_arguments(argument_validation_params)
1118
1627
 
1119
1628
  df = self.__get_data_source_df()
1120
- df = df[df.name == name]
1629
+ df = df[(df['name'] == name) &
1630
+ (df['data_domain'] == self.__data_domain)]
1121
1631
 
1122
- # Check if a entity with that name exists or not. If not, raise error.
1632
+ # Check if a data source with that name exists or not. If not, raise error.
1123
1633
  if df.shape[0] == 0:
1124
- msg_code = MessageCodes.FUNC_EXECUTION_FAILED
1125
- error_msg = Messages.get_message(
1126
- msg_code, "get_data_source()", "DataSource with name '{}' does not exist.".format(name))
1634
+ res = _FSUtils._get_data_domains(self.__repo, name, 'data_source')
1635
+ if res:
1636
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
1637
+ error_msg = Messages.get_message(msg_code, "DataSource", "name '{}'".format(name),
1638
+ self.__data_domain, res)
1639
+ else:
1640
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
1641
+ error_msg = Messages.get_message(msg_code, "DataSource", "name '{}'".format(name),
1642
+ self.__data_domain)
1127
1643
  raise TeradataMlException(error_msg, msg_code)
1128
1644
 
1129
1645
  return DataSource._from_df(df)
1130
-
1131
- def set_features_inactive(self, names):
1646
+
1647
+ def get_feature_process(self, object, entity=None, features=None, description=None):
1132
1648
  """
1133
1649
  DESCRIPTION:
1134
- Mark the feature status as 'inactive'. Note that, inactive features are
1135
- not available for any further processing. Set the status as 'active' with
1136
- "set_features_active()" method.
1650
+ Retrieves the FeatureProcess object.
1137
1651
 
1138
1652
  PARAMETERS:
1139
- names:
1653
+ object:
1140
1654
  Required Argument.
1141
- Specifies the name(s) of the feature(s).
1142
- Types: str OR list of str
1655
+ Specifies the source to ingest feature values. It can be one of the following:
1656
+ * teradataml DataFrame
1657
+ * Feature group
1658
+ * Process id
1659
+ Notes:
1660
+ * If "object" is of type teradataml DataFrame, then "entity"
1661
+ and "features" should be provided.
1662
+ * If "object" is of type str, then it is considered as
1663
+ as process id of an existing FeatureProcess and reruns the
1664
+ process. Entity and features are taken from the existing
1665
+ feature process. Hence, the arguments "entity" and "features"
1666
+ are ignored.
1667
+ * If "object" is of type FeatureGroup, then entity and features
1668
+ are taken from the FeatureGroup. Hence, the arguments "entity"
1669
+ and "features" are ignored.
1670
+ Types: DataFrame or FeatureGroup or str
1671
+
1672
+ entity:
1673
+ Optional Argument.
1674
+ Specifies Entity for DataFrame.
1675
+ Notes:
1676
+ * Ignored when "object" is of type FeatureGroup or str.
1677
+ * If a string or list of strings is provided, then "object" should
1678
+ have these columns in it.
1679
+ * If Entity object is provided, then associated columns in Entity
1680
+ object should be present in DataFrame.
1681
+ Types: Entity or str or list of str
1682
+
1683
+ features:
1684
+ Optional Argument.
1685
+ Specifies list of features to be considered in feature process. Feature
1686
+ ingestion takes place only for these features.
1687
+ Note:
1688
+ * Ignored when "object" is of type FeatureGroup or str.
1689
+ Types: Feature or list of Feature or str or list of str.
1690
+
1691
+ description:
1692
+ Optional Argument.
1693
+ Specifies description for the FeatureProcess.
1694
+ Types: str
1143
1695
 
1144
1696
  RETURNS:
1145
- bool
1697
+ FeatureProcess
1146
1698
 
1147
1699
  RAISES:
1148
- teradataMLException
1700
+ None.
1149
1701
 
1150
1702
  EXAMPLES:
1151
- >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1152
- # Load the admissions data to Vantage.
1153
- >>> load_example_data("dataframe", "admissions_train")
1154
- # Create DataFrame on admissions data.
1155
- >>> df = DataFrame("admissions_train")
1156
- >>> df
1157
- masters gpa stats programming admitted
1158
- id
1159
- 34 yes 3.85 Advanced Beginner 0
1160
- 32 yes 3.46 Advanced Beginner 0
1161
- 11 no 3.13 Advanced Advanced 1
1162
- 40 yes 3.95 Novice Beginner 0
1163
- 38 yes 2.65 Advanced Beginner 1
1164
- 36 no 3.00 Advanced Novice 0
1165
- 7 yes 2.33 Novice Novice 1
1166
- 26 yes 3.57 Advanced Advanced 1
1167
- 19 yes 1.98 Advanced Advanced 0
1168
- 13 no 4.00 Advanced Novice 1
1169
- >>>
1170
- # Create FeatureGroup from DataFrame df.
1171
- >>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
1172
- # Apply the FeatureGroup to FeatureStore 'vfs_v1'.
1703
+ >>> from teradataml import FeatureStore
1173
1704
  >>> fs = FeatureStore('vfs_v1')
1174
- >>> fs.apply(fg)
1705
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1706
+ # Setup FeatureStore for this repository.
1707
+ >>> fs.setup()
1175
1708
  True
1176
- # Get FeatureGroup 'admissions' from FeatureStore.
1177
- >>> fg = fs.get_feature_group('admissions')
1178
- >>> fg
1179
- FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1180
1709
 
1181
- # Set the Feature 'programming' inactive.
1182
- >>> fs.set_features_inactive('programming')
1183
- True
1184
- # Get FeatureGroup again after setting feature inactive.
1185
- >>> fg = fs.get_feature_group('admissions')
1186
- >>> fg
1187
- FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1188
- >>>
1710
+ # Load the admissions data to Vantage.
1711
+ >>> from teradataml import DataFrame, load_example_data
1712
+ >>> load_example_data("dataframe", "admissions_train")
1713
+ >>> admission_df = DataFrame("admissions_train")
1714
+
1715
+ >>> fp = FeatureProcess(repo='vfs_v1',
1716
+ ... data_domain='d1',
1717
+ ... object=admission_df,
1718
+ ... entity='id',
1719
+ ... features=['stats', 'programming', 'admitted'])
1720
+ >>> fp.run()
1721
+ Process '0d365f08-66b0-11f0-88ff-b0dcef8381ea' started.
1722
+ Process '0d365f08-66b0-11f0-88ff-b0dcef8381ea' completed.
1723
+
1724
+ >>> fs.get_feature_process(object='0d365f08-66b0-11f0-88ff-b0dcef8381ea')
1725
+ FeatureProcess(repo=vfs_v1, data_domain=d1, process_id=0d365f08-66b0-11f0-88ff-b0dcef8381ea)
1189
1726
  """
1190
- return self.__set_active_inactive_features(names, active=False)
1191
-
1192
- def set_features_active(self, names):
1727
+ return FeatureProcess(repo=self.__repo,
1728
+ data_domain=self.__data_domain,
1729
+ object=object,
1730
+ entity=entity,
1731
+ features=features,
1732
+ description=description
1733
+ )
1734
+
1735
+ def get_feature_catalog(self):
1193
1736
  """
1194
1737
  DESCRIPTION:
1195
- Mark the feature status as active. Set the status as 'inactive' with
1196
- "set_features_inactive()" method. Note that, inactive features are
1197
- not available for any further processing.
1738
+ Retrieves FeatureCatalog based on the feature store's repo and data domain.
1198
1739
 
1199
1740
  PARAMETERS:
1200
- names:
1201
- Required Argument.
1202
- Specifies the name(s) of the feature(s).
1203
- Types: str OR list of str
1741
+ None.
1204
1742
 
1205
1743
  RETURNS:
1206
- bool
1744
+ FeatureCatalog
1207
1745
 
1208
1746
  RAISES:
1209
- teradataMLException
1747
+ None.
1210
1748
 
1211
1749
  EXAMPLES:
1212
- >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1213
- # Load the admissions data to Vantage.
1214
- >>> load_example_data("dataframe", "admissions_train")
1215
- # Create DataFrame on admissions data.
1750
+ >>> from teradataml import FeatureStore
1751
+ # Create FeatureStore for repo 'vfs_v1'.
1752
+ >>> fs = FeatureStore('vfs_v1')
1753
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1754
+ # Setup FeatureStore for this repository.
1755
+ >>> fs.setup()
1756
+ True
1757
+
1758
+ # Load the sales data to Vantage.
1759
+ from teradataml import load_example_data
1760
+ >>> load_example_data("dataframe", "sales")
1761
+ >>> df = DataFrame("sales")
1762
+
1763
+ # Create a feature process.
1764
+ >>> from teradataml import FeatureProcess
1765
+ >>> fp = FeatureProcess(repo="vfs_v1",
1766
+ ... data_domain='sales',
1767
+ ... object=df,
1768
+ ... entity="accounts",
1769
+ ... features=["Jan", "Feb", "Mar", "Apr"])
1770
+ >>> fp.run()
1771
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
1772
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
1773
+
1774
+ # Get FeatureCatalog from FeatureStore.
1775
+ >>> fs.get_feature_catalog()
1776
+ FeatureCatalog(repo=vfs_v1, data_domain=sales)
1777
+ """
1778
+ return FeatureCatalog(repo=self.__repo,
1779
+ data_domain=self.__data_domain)
1780
+
1781
+ def get_data_domain(self):
1782
+ """
1783
+ DESCRIPTION:
1784
+ Retrieves DataDomain based on the feature store's repo and data domain.
1785
+
1786
+ PARAMETERS:
1787
+ None
1788
+
1789
+ RETURNS:
1790
+ DataDomain
1791
+
1792
+ RAISES:
1793
+ None.
1794
+
1795
+ EXAMPLES:
1796
+ >>> from teradataml import FeatureStore
1797
+ # Create FeatureStore for repo 'vfs_v1'.
1798
+ >>> fs = FeatureStore('vfs_v1', data_domain='sales')
1799
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1800
+ # Setup FeatureStore for this repository.
1801
+ >>> fs.setup()
1802
+
1803
+ # Get DataDomain from FeatureStore.
1804
+ >>> fs.get_data_domain()
1805
+ DataDomain(repo=vfs_v1, data_domain=sales)
1806
+ """
1807
+ return DataDomain(repo=self.__repo,
1808
+ data_domain=self.__data_domain)
1809
+
1810
+ def get_dataset_catalog(self):
1811
+ """
1812
+ DESCRIPTION:
1813
+ Retrieves DatasetCatalog based on the feature store's repo and data domain.
1814
+
1815
+ PARAMETERS:
1816
+ None.
1817
+
1818
+ RETURNS:
1819
+ DatasetCatalog
1820
+
1821
+ RAISES:
1822
+ None.
1823
+
1824
+ EXAMPLES:
1825
+ >>> from teradataml import FeatureStore
1826
+ # Create FeatureStore for repo 'vfs_v1'.
1827
+ >>> fs = FeatureStore('vfs_v1', data_domain='sales')
1828
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1829
+ # Setup FeatureStore for this repository.
1830
+ >>> fs.setup()
1831
+
1832
+ # Load the sales data to Vantage.
1833
+ >>> from teradataml import load_example_data
1834
+ >>> load_example_data("dataframe", "sales")
1835
+ >>> df = DataFrame("sales")
1836
+
1837
+ # Create a feature process.
1838
+ >>> from teradataml import FeatureProcess
1839
+ >>> fp = FeatureProcess(repo="vfs_v1",
1840
+ ... data_domain='sales',
1841
+ ... object=df,
1842
+ ... entity="accounts",
1843
+ ... features=["Jan", "Feb", "Mar", "Apr"])
1844
+ >>> fp.run()
1845
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
1846
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
1847
+ True
1848
+
1849
+ # Build the dataset.
1850
+ >>> dc = DatasetCatalog(repo='vfs_v1', data_domain='sales')
1851
+ >>> dataset = dc.build_dataset(entity='accounts',
1852
+ ... selected_features = {
1853
+ ... 'Jan': fp.process_id,
1854
+ ... 'Feb': fp.process_id},
1855
+ ... view_name='ds_jan_feb',
1856
+ ... description='Dataset with Jan and Feb features')
1857
+
1858
+ # Get DatasetCatalog from FeatureStore.
1859
+ >>> fs.get_dataset_catalog()
1860
+ DatasetCatalog(repo=vfs_v1, data_domain=sales)
1861
+ """
1862
+ return DatasetCatalog(repo=self.__repo,
1863
+ data_domain=self.__data_domain)
1864
+
1865
+ def set_features_inactive(self, names):
1866
+ """
1867
+ DESCRIPTION:
1868
+ Mark the feature status as 'inactive'. Note that, inactive features are
1869
+ not available for any further processing. Set the status as 'active' with
1870
+ "set_features_active()" method.
1871
+
1872
+ PARAMETERS:
1873
+ names:
1874
+ Required Argument.
1875
+ Specifies the name(s) of the feature(s).
1876
+ Types: str OR list of str
1877
+
1878
+ RETURNS:
1879
+ bool
1880
+
1881
+ RAISES:
1882
+ teradataMLException
1883
+
1884
+ EXAMPLES:
1885
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1886
+ # Create DataFrame on admissions data.
1887
+ >>> load_example_data("dataframe", "admissions_train")
1216
1888
  >>> df = DataFrame("admissions_train")
1217
1889
  >>> df
1218
1890
  masters gpa stats programming admitted
@@ -1227,25 +1899,103 @@ class FeatureStore:
1227
1899
  26 yes 3.57 Advanced Advanced 1
1228
1900
  19 yes 1.98 Advanced Advanced 0
1229
1901
  13 no 4.00 Advanced Novice 1
1230
- >>>
1902
+
1903
+ # Create FeatureStore for repo 'vfs_v1'.
1904
+ >>> fs = FeatureStore("vfs_v1")
1905
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1906
+ # Setup FeatureStore for this repository.
1907
+ >>> fs.setup()
1908
+ True
1909
+
1231
1910
  # Create FeatureGroup from DataFrame df.
1232
1911
  >>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
1233
1912
  # Apply the FeatureGroup to FeatureStore 'vfs_v1'.
1234
- >>> fs = FeatureStore('vfs_v1')
1235
1913
  >>> fs.apply(fg)
1236
1914
  True
1915
+
1237
1916
  # Get FeatureGroup 'admissions' from FeatureStore.
1238
1917
  >>> fg = fs.get_feature_group('admissions')
1239
1918
  >>> fg
1240
1919
  FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1920
+
1921
+ # Example 1: Set the Feature 'programming' inactive.
1241
1922
  # Set the Feature 'programming' inactive.
1242
1923
  >>> fs.set_features_inactive('programming')
1243
1924
  True
1925
+
1926
+ # Get FeatureGroup again after setting feature inactive.
1927
+ >>> fg = fs.get_feature_group('admissions')
1928
+ >>> fg
1929
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1930
+
1931
+ """
1932
+ return self.__set_active_inactive_features(names, active=False)
1933
+
1934
+ def set_features_active(self, names):
1935
+ """
1936
+ DESCRIPTION:
1937
+ Mark the feature status as active. Set the status as 'inactive' with
1938
+ "set_features_inactive()" method. Note that, inactive features are
1939
+ not available for any further processing.
1940
+
1941
+ PARAMETERS:
1942
+ names:
1943
+ Required Argument.
1944
+ Specifies the name(s) of the feature(s).
1945
+ Types: str OR list of str
1946
+
1947
+ RETURNS:
1948
+ bool
1949
+
1950
+ RAISES:
1951
+ teradataMLException
1952
+
1953
+ EXAMPLES:
1954
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1955
+ # Create DataFrame on admissions data.
1956
+ >>> load_example_data("dataframe", "admissions_train")
1957
+ >>> df = DataFrame("admissions_train")
1958
+ >>> df
1959
+ masters gpa stats programming admitted
1960
+ id
1961
+ 34 yes 3.85 Advanced Beginner 0
1962
+ 32 yes 3.46 Advanced Beginner 0
1963
+ 11 no 3.13 Advanced Advanced 1
1964
+ 40 yes 3.95 Novice Beginner 0
1965
+ 38 yes 2.65 Advanced Beginner 1
1966
+ 36 no 3.00 Advanced Novice 0
1967
+ 7 yes 2.33 Novice Novice 1
1968
+ 26 yes 3.57 Advanced Advanced 1
1969
+ 19 yes 1.98 Advanced Advanced 0
1970
+ 13 no 4.00 Advanced Novice 1
1971
+
1972
+ # Create FeatureStore for repo 'vfs_v1'.
1973
+ >>> fs = FeatureStore("vfs_v1")
1974
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1975
+ # Setup FeatureStore for this repository.
1976
+ >>> fs.setup()
1977
+ True
1978
+
1979
+ # Create FeatureGroup from DataFrame df.
1980
+ >>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
1981
+ # Apply the FeatureGroup to FeatureStore 'vfs_v1'.
1982
+ >>> fs.apply(fg)
1983
+ True
1984
+
1985
+ # Get FeatureGroup 'admissions' from FeatureStore.
1986
+ >>> fg = fs.get_feature_group('admissions')
1987
+ >>> fg
1988
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1989
+
1990
+ # Example 1: Set the Feature 'programming' inactive.
1991
+ # Set the Feature 'programming' inactive.
1992
+ >>> fs.set_features_inactive('programming')
1993
+ True
1994
+
1244
1995
  # Get FeatureGroup again after setting feature inactive.
1245
1996
  >>> fg = fs.get_feature_group('admissions')
1246
1997
  >>> fg
1247
1998
  FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
1248
- >>>
1249
1999
 
1250
2000
  # Mark Feature 'programming' from 'inactive' to 'active'.
1251
2001
  >>> fs.set_features_active('programming')
@@ -1293,17 +2043,53 @@ class FeatureStore:
1293
2043
 
1294
2044
  status = FeatureStatus.ACTIVE.name if active else FeatureStatus.INACTIVE.name
1295
2045
 
1296
- _update_data(table_name=EFS_FEATURES_SPEC["table_name"],
2046
+ is_set = True
2047
+ if status == FeatureStatus.INACTIVE.name:
2048
+ # Get the joined df of '_efs_features' and '_efs_features_metadata'.
2049
+ feature_info_df = self.__get_feature_info_df()
2050
+ metadata_features = [feature.name for feature in feature_info_df.itertuples()]
2051
+
2052
+ # Form a list of user provided feature names which are
2053
+ # present in catalog and not present in catalog.
2054
+ catalog_features = []
2055
+ non_catalog_features = []
2056
+ for name in names:
2057
+ if name in metadata_features:
2058
+ catalog_features.append(name)
2059
+ else:
2060
+ non_catalog_features.append(name)
2061
+
2062
+ # If user provided all names are present in catalog.
2063
+ if len(catalog_features) == len(names):
2064
+ print("Feature(s) '{}' entries exists in feature catalog, cannot be set "
2065
+ "to inactive.".format(", ".join(catalog_features)))
2066
+ return False
2067
+ # If some of the user provided features present in catalog.
2068
+ elif len(catalog_features) > 0:
2069
+ print("Feature(s) '{}' entries exists in feature catalog, cannot be set "
2070
+ "to inactive.".format(", ".join(catalog_features)))
2071
+ is_set = is_set and False
2072
+
2073
+ # Assign feature names list which are not present in catalog.
2074
+ names = non_catalog_features
2075
+
2076
+ _update_data(table_name=self.__table_names['feature'],
1297
2077
  schema_name=self.__repo,
1298
2078
  update_columns_values={"status": status},
1299
2079
  update_conditions={"name": names}
1300
2080
  )
1301
- return True
2081
+
2082
+ return is_set
1302
2083
 
1303
2084
  def apply(self, object):
1304
2085
  """
1305
2086
  DESCRIPTION:
1306
2087
  Register objects to repository.
2088
+ Note:
2089
+ * If the object is an Entity or FeatureGroup and the same entity or feature group is already
2090
+ registered in the repository, it is not updated.
2091
+ * If the entity or feature group is associated with any feature process, an error is raised
2092
+ while modifying these objects.
1307
2093
 
1308
2094
  PARAMETERS:
1309
2095
  object:
@@ -1318,16 +2104,24 @@ class FeatureStore:
1318
2104
  TeradataMLException
1319
2105
 
1320
2106
  EXAMPLES:
2107
+ >>> from teradataml import FeatureStore, DataFrame, load_example_data
2108
+ # Create DataFrame on sales data.
1321
2109
  >>> load_example_data('dataframe', ['sales'])
1322
2110
  >>> df = DataFrame("sales")
1323
2111
 
2112
+ # Create FeatureStore for repo 'vfs_v1'.
2113
+ >>> fs = FeatureStore("vfs_v1")
2114
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
2115
+ # Setup FeatureStore for this repository.
2116
+ >>> fs.setup()
2117
+ True
2118
+
1324
2119
  # Example 1: create a Feature for column 'Feb' from 'sales' DataFrame
1325
2120
  # and register with repo 'vfs_v1'.
1326
2121
  >>> # Create Feature.
1327
2122
  >>> from teradataml import Feature
1328
2123
  >>> feature = Feature('sales:Feb', df.Feb)
1329
2124
  >>> # Register the above Feature with repo.
1330
- >>> fs = FeatureStore('vfs_v1')
1331
2125
  >>> fs.apply(feature)
1332
2126
  True
1333
2127
  >>>
@@ -1338,7 +2132,6 @@ class FeatureStore:
1338
2132
  >>> from teradataml import Entity
1339
2133
  >>> entity = Entity('sales:accounts', df.accounts)
1340
2134
  >>> # Register the above Entity with repo.
1341
- >>> fs = FeatureStore('vfs_v1')
1342
2135
  >>> fs.apply(entity)
1343
2136
  True
1344
2137
  >>>
@@ -1349,7 +2142,6 @@ class FeatureStore:
1349
2142
  >>> from teradataml import DataSource
1350
2143
  >>> ds = DataSource('Sales_Data', df)
1351
2144
  >>> # Register the above DataSource with repo.
1352
- >>> fs = FeatureStore('vfs_v1')
1353
2145
  >>> fs.apply(ds)
1354
2146
  True
1355
2147
  >>>
@@ -1364,29 +2156,73 @@ class FeatureStore:
1364
2156
  ... entity=entity,
1365
2157
  ... data_source=data_source)
1366
2158
  >>> # Register the above FeatureStore with repo.
1367
- >>> fs = FeatureStore('vfs_v1')
1368
2159
  >>> fs.apply(fg)
1369
2160
  True
1370
- >>>
1371
2161
  """
1372
2162
  argument_validation_params = []
1373
2163
  argument_validation_params.append(["name", object, False, (Feature, Entity, DataSource, FeatureGroup)])
1374
2164
 
1375
2165
  # Validate argument types
1376
2166
  _Validators._validate_function_arguments(argument_validation_params)
1377
- return object.publish(self.__repo)
2167
+ return object.publish(self.__repo, self.__data_domain)
1378
2168
 
1379
- def get_dataset(self, group_name):
2169
+ def get_data(self, process_id=None, entity=None, features=None,
2170
+ dataset_name=None, as_of=None, include_historic_records=False):
1380
2171
  """
1381
2172
  DESCRIPTION:
1382
- Returns teradataml DataFrame based on "group_name".
2173
+ Returns teradataml DataFrame which has entities and feature values.
2174
+ Method generates dataset from following -
2175
+ * process_id
2176
+ * entity and features
2177
+ * dataset_name
1383
2178
 
1384
2179
  PARAMETERS:
1385
- group_name:
1386
- Required Argument.
1387
- Specifies the name of the feature group.
2180
+ process_id:
2181
+ Optional Argument.
2182
+ Either "process_id", "entity" and "features", "dataset_name" is mandatory.
2183
+ Specifies the process id of an existing feature process.
2184
+ Types: str
2185
+
2186
+ entity:
2187
+ Optional Argument.
2188
+ Specifies the name of the Entity or Object of Entity
2189
+ to be considered in the dataset.
2190
+ Types: str or Entity.
2191
+
2192
+ features:
2193
+ Optional Argument.
2194
+ Specifies the names of Features and the corresponding feature version
2195
+ to be included in the dataset.
2196
+ Notes:
2197
+ * Key is the name of the feature and value is the version of the
2198
+ feature.
2199
+ * Look at FeatureCatalog.list_feature_versions() to get the list of
2200
+ features and their versions.
2201
+ Types: dict
2202
+
2203
+ dataset_name:
2204
+ Optional Argument.
2205
+ Specifies the dataset name.
1388
2206
  Types: str
1389
2207
 
2208
+ as_of:
2209
+ Optional Argument.
2210
+ Specifies the time to retrieve the Feature Values instead of
2211
+ retrieving the latest values.
2212
+ Notes:
2213
+ * Applicable only when "process_id" is passed to the function.
2214
+ * Ignored when "dataset_name" is passed.
2215
+ Types: str or datetime.datetime
2216
+
2217
+ include_historic_records:
2218
+ Optional Argument.
2219
+ Specifies whether to include historic data in the dataset.
2220
+ Note:
2221
+ * If "as_of" is specified, then the "include_historic_records" argument is ignored.
2222
+ Default Value: False.
2223
+ Types: bool.
2224
+
2225
+
1390
2226
  RETURNS:
1391
2227
  teradataml DataFrame.
1392
2228
 
@@ -1395,11 +2231,9 @@ class FeatureStore:
1395
2231
 
1396
2232
  EXAMPLES:
1397
2233
  >>> from teradataml import DataFrame, FeatureStore, load_example_data
1398
- # Load the sales data to Vantage.
1399
- >>> load_example_data("dataframe", "sales")
1400
2234
  # Create DataFrame on sales data.
2235
+ >>> load_example_data("dataframe", "sales")
1401
2236
  >>> df = DataFrame("sales")
1402
- >>> df
1403
2237
  >>> df
1404
2238
  Feb Jan Mar Apr datetime
1405
2239
  accounts
@@ -1408,99 +2242,415 @@ class FeatureStore:
1408
2242
  Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
1409
2243
  Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
1410
2244
  Yellow Inc 90.0 NaN NaN NaN 04/01/2017
1411
- >>>
1412
- # Create FeatureGroup with name 'sales' from DataFrame.
1413
- >>> fg = FeatureGroup.from_DataFrame(
1414
- ... name="sales", df=df, entity_columns="accounts", timestamp_col_name="datetime")
1415
- # Apply the FeatureGroup to FeatureStore.
1416
- >>> fs = FeatureStore("vfs_v1")
1417
- >>> fs.apply(fg)
2245
+
2246
+ >>> repo = 'vfs_v1'
2247
+ >>> data_domain = 'sales'
2248
+ >>> fs = FeatureStore(repo=repo, data_domain=data_domain)
2249
+ FeatureStore is ready to use.
2250
+
2251
+ # Example 1: Get the data from process_id.
2252
+ >>> fp = FeatureProcess(repo=repo,
2253
+ ... data_domain=data_domain,
2254
+ ... object=df,
2255
+ ... entity='accounts',
2256
+ ... features=['Jan', 'Feb'])
2257
+ >>> fp.run()
2258
+ Process '1e9e8d64-6851-11f0-99c5-a30631e77953' started.
2259
+ Process '1e9e8d64-6851-11f0-99c5-a30631e77953' completed.
1418
2260
  True
1419
2261
 
1420
- # Get the DataSet for FeatureGroup 'sales'
1421
- >>> df = fs.get_dataset('sales')
1422
- >>> df
1423
- datetime Jan Feb Apr Mar
2262
+ >>> fs.get_data(process_id=fp.process_id)
2263
+ accounts Feb Jan
2264
+ 0 Alpha Co 210.0 200.0
2265
+ 1 Blue Inc 90.0 50.0
2266
+ 2 Jones LLC 200.0 150.0
2267
+ 3 Orange Inc 210.0 NaN
2268
+ 4 Yellow Inc 90.0 NaN
2269
+ 5 Red Inc 200.0 150.0
2270
+
2271
+ # Example 2: Get the data from entity and features.
2272
+ >>> fs.get_data(entity='accounts', features={'Jan': fp.process_id})
2273
+ accounts Jan
2274
+ 0 Alpha Co 200.0
2275
+ 1 Blue Inc 50.0
2276
+ 2 Jones LLC 150.0
2277
+ 3 Orange Inc NaN
2278
+ 4 Yellow Inc NaN
2279
+ 5 Red Inc 150.0
2280
+
2281
+ # Example 3: Get the data from dataset name.
2282
+ >>> dc = DatasetCatalog(repo=repo, data_domain=data_domain)
2283
+ >>> dc.build_dataset(entity='accounts',
2284
+ ... selected_features={'Jan': fp.process_id,
2285
+ ... 'Feb': fp.process_id},
2286
+ ... view_name='test_get_data',
2287
+ ... description='Dataset with Jan and Feb')
2288
+ >>> fs.get_data(dataset_name='test_get_data')
2289
+ accounts Feb Jan
2290
+ 0 Alpha Co 210.0 200.0
2291
+ 1 Blue Inc 90.0 50.0
2292
+ 2 Jones LLC 200.0 150.0
2293
+ 3 Orange Inc 210.0 NaN
2294
+ 4 Yellow Inc 90.0 NaN
2295
+ 5 Red Inc 200.0 150.0
2296
+
2297
+
2298
+ # Example 4: Get the data from Entity and Features, where entity
2299
+ # object and feature objects passed to the entity and
2300
+ # features arguments.
2301
+ >>> # Create features.
2302
+ >>> feature1 = Feature('sales:Mar',
2303
+ ... df.Mar,
2304
+ ... feature_type=FeatureType.CATEGORICAL)
2305
+
2306
+ >>> feature2 = Feature('sales:Apr',
2307
+ ... df.Apr,
2308
+ ... feature_type=FeatureType.CONTINUOUS)
2309
+
2310
+ >>> # Create entity.
2311
+ >>> entity = Entity(name='accounts_entity', columns=['accounts'])
2312
+
2313
+ >>> fp1 = FeatureProcess(repo=repo,
2314
+ ... data_domain=data_domain,
2315
+ ... object=df,
2316
+ ... entity=entity,
2317
+ ... features=[feature1, feature2])
2318
+ >>> fp1.run()
2319
+ Process '5522c034-684d-11f0-99c5-a30631e77953' started.
2320
+ Process '5522c034-684d-11f0-99c5-a30631e77953' completed.
2321
+ True
2322
+
2323
+ >>> fs.get_data(entity=entity, features={feature1.name: fp1.process_id,
2324
+ ... feature2.name: fp1.process_id})
2325
+ accounts sales:Mar sales:Apr
2326
+ 0 Alpha Co 215.0 250.0
2327
+ 1 Blue Inc 95.0 101.0
2328
+ 2 Jones LLC 140.0 180.0
2329
+ 3 Orange Inc NaN 250.0
2330
+ 4 Yellow Inc NaN NaN
2331
+ 5 Red Inc 140.0 NaN
2332
+
2333
+ # Example 5: Get the data for the time passed by the user via the as_of argument.
2334
+ >>> import time
2335
+ >>> from datetime import datetime as dt, date as d
2336
+
2337
+ # Retrieve the record where accounts == 'Blue Inc'.
2338
+ >>> df_test = df[df['accounts'] == 'Blue Inc']
2339
+ >>> df_test
2340
+ Feb Jan Mar Apr datetime
1424
2341
  accounts
1425
- Orange Inc 04/01/2017 NaN 210.0 250.0 NaN
1426
- Jones LLC 04/01/2017 150.0 200.0 180.0 140.0
1427
- Blue Inc 04/01/2017 50.0 90.0 101.0 95.0
1428
- Alpha Co 04/01/2017 200.0 210.0 250.0 215.0
1429
- Yellow Inc 04/01/2017 NaN 90.0 NaN NaN
1430
- >>>
1431
- """
1432
- # Get the FeatureGroup first and extract all details.
1433
- feature_group = self.get_feature_group(group_name)
1434
- columns = [feature.column_name for feature in feature_group.features
1435
- if feature.status != FeatureStatus.INACTIVE]
1436
- entity_columns = feature_group.entity.columns
1437
- source = feature_group.data_source.source
2342
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
2343
+
2344
+ # Example updates the data. Hence, creating a new table to avoid modifying the existing tables data.
2345
+ >>> df_test.to_sql('sales_test', if_exists='replace')
2346
+ >>> test_df = DataFrame('sales_test')
2347
+ >>> test_df
2348
+ accounts Feb Jan Mar Apr datetime
2349
+ 0 Blue Inc 90.0 50 95 101 17/01/04
2350
+
2351
+ >>> # Create a feature process.
2352
+ >>> fp = FeatureProcess(repo=repo,
2353
+ ... data_domain=data_domain,
2354
+ ... object=test_df,
2355
+ ... entity='accounts',
2356
+ ... features=['Jan', 'Feb'])
2357
+
2358
+ >>> # Run the feature process
2359
+ >>> fp.run()
2360
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
2361
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
2362
+ True
1438
2363
 
1439
- # Create DF from the source.
1440
- df = DataFrame.from_query(source)
2364
+ >>> # Running the same process more than once to demonstrate how user can
2365
+ >>> # retrieve specific version of Features using argument 'as_of'.
2366
+ >>> # Wait for 20 seconds. Then update the data. Then run again.
2367
+ >>> time.sleep(20)
2368
+ >>> execute_sql("update sales_test set Jan = Jan * 10, Feb = Feb * 10")
2369
+ TeradataCursor uRowsHandle=269 bClosed=False
2370
+
2371
+ >>> # Run the feature process again.
2372
+ >>> fp.run()
2373
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
2374
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
2375
+ True
1441
2376
 
1442
- # Select the corresponding columns.
1443
- required_columns = entity_columns + columns
1444
- if feature_group.data_source.timestamp_col_name:
1445
- columns = [col for col in columns if col != feature_group.data_source.timestamp_col_name]
1446
- required_columns = entity_columns + [feature_group.data_source.timestamp_col_name] + columns
1447
- return df.select(required_columns)
2377
+ >>> # Then again wait for 20 seconds. Then update the data. Then run again.
2378
+ >>> time.sleep(20)
2379
+ >>> execute_sql("update sales_test set Jan = Jan * 10, Feb = Feb * 10")
2380
+ TeradataCursor uRowsHandle=397 bClosed=False
1448
2381
 
1449
- def __get_feature_group_names(self, name, type_):
2382
+ >>> # Run the feature process again.
2383
+ >>> fp.run()
2384
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
2385
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
2386
+ True
2387
+
2388
+ # Retrieve specific version of Features at '2025-08-15 12:37:23'
2389
+ >>> as_of_time = dt(2025, 8, 15, 12, 37, 23)
2390
+
2391
+ >>> # time passed to as_of in datetime.datetime format.
2392
+ >>> fs.get_data(process_id=fp.process_id,
2393
+ ... as_of=as_of_time)
2394
+ accounts Feb Jan
2395
+ 0 Blue Inc 900.0 500
2396
+
2397
+ >>> # time passed to as_of in string format.
2398
+ >>> fs.get_data(process_id=fp.process_id,
2399
+ ... as_of=as_of_time.strftime('%Y-%m-%d %H:%M:%S'))
2400
+ accounts Feb Jan
2401
+ 0 Blue Inc 900.0 500
2402
+
2403
+ # Example 6: Get the data for the time passed by the user via the as_of argument
2404
+ # by sourcing entity and features.
2405
+ >>> # time passed to as_of in datetime.datetime format.
2406
+ >>> fs.get_data(entity='accounts',
2407
+ ... features={'Feb': fp.process_id,
2408
+ ... 'Jan': fp.process_id},
2409
+ ... as_of=as_of_time)
2410
+ accounts Feb Jan
2411
+ 0 Blue Inc 900.0 500
2412
+
2413
+ >>> # time passed to as_of in string format.
2414
+ >>> fs.get_data(entity='accounts',
2415
+ ... features={'Feb': fp.process_id,
2416
+ ... 'Jan': fp.process_id},
2417
+ ... as_of=as_of_time.strftime('%Y-%m-%d %H:%M:%S'))
2418
+ accounts Feb Jan
2419
+ 0 Blue Inc 900.0 500
2420
+
2421
+ # Example 7: Get the latest data for the given process_id.
2422
+ >>> fs.get_data(process_id=fp.process_id, include_historic_records=False)
2423
+ accounts Feb Jan
2424
+ 0 Blue Inc 9000.0 5000
2425
+
2426
+ # Example 8: Get the historic data for the given process_id.
2427
+ >>> fs.get_data(process_id=fp.process_id, include_historic_records=True)
2428
+ accounts Feb Jan
2429
+ 0 Blue Inc 9000.0 5000
2430
+ 1 Blue Inc 90.0 50
2431
+ 2 Blue Inc 90.0 5000
2432
+ 3 Blue Inc 900.0 500
2433
+ 4 Blue Inc 900.0 5000
2434
+ 5 Blue Inc 900.0 50
2435
+ 6 Blue Inc 90.0 500
2436
+ 7 Blue Inc 9000.0 50
2437
+ 8 Blue Inc 9000.0 500
2438
+
2439
+ # Example 9: Get the latest data for the given feature.
2440
+ >>> fs.get_data(entity='accounts', features={'Feb': fp.process_id}, include_historic_records=False)
2441
+ accounts Feb
2442
+ 0 Blue Inc 9000.0
2443
+
2444
+ # Example 10: Get the historic data for the given feature.
2445
+ >>> fs.get_data(entity='accounts', features={'Feb': fp.process_id}, include_historic_records=True)
2446
+ accounts Feb
2447
+ 0 Blue Inc 900.0
2448
+ 1 Blue Inc 90.0
2449
+ 2 Blue Inc 9000.0
2450
+
2451
+ """
2452
+ # Validate argument types
2453
+ args = []
2454
+ args.append(["process_id", process_id, True, (str), True])
2455
+ args.append(["entity", entity, True, (Entity, str), True])
2456
+ args.append(["features", features, True, (dict), True])
2457
+ args.append(["dataset_name", dataset_name, True, (str), True])
2458
+ args.append(["as_of", as_of, True, (str, dt), True])
2459
+ args.append(["include_historic_records", include_historic_records, True, (bool)])
2460
+
2461
+ _Validators._validate_function_arguments(args)
2462
+
2463
+ # Validate mutually exclusive arguments.
2464
+ _Validators._validate_mutually_exclusive_argument_groups({"process_id": process_id},
2465
+ {"dataset_name": dataset_name},
2466
+ {"entity": entity, "features": features})
2467
+
2468
+ # Validate whether entity and features are mutually inclusive.
2469
+ _Validators._validate_mutually_inclusive_arguments(entity, "entity",
2470
+ features, "features")
2471
+
2472
+ # Validate at least one argument is passed.
2473
+ _Validators._validate_any_argument_passed({"process_id": process_id,
2474
+ "entity' and 'features": entity,
2475
+ "dataset_name": dataset_name})
2476
+
2477
+ # If user pass view, return DataFrame directly.
2478
+ if dataset_name:
2479
+ return DataFrame(in_schema(self.__repo, dataset_name))
2480
+
2481
+ if process_id:
2482
+ entity, features = (
2483
+ self.__get_entity_and_features_from_process_id(process_id))
2484
+
2485
+ # Genarate the view name.
2486
+ view_name = UtilFuncs._generate_temp_table_name(databasename=self.__repo)
2487
+
2488
+ # When as_of is not None, get all the data instead of only latest.
2489
+ if as_of:
2490
+ include_historic_records = True
2491
+
2492
+ # Create the DatasetCatalog and build dataset on top of it.
2493
+ dc = DatasetCatalog(repo=self.__repo, data_domain=self.__data_domain)
2494
+ dataset = dc._build_dataset(
2495
+ entity, features,
2496
+ include_historic_records=include_historic_records,
2497
+ include_time_series=True if as_of else False,
2498
+ view_name=view_name,
2499
+ temporary=True)
2500
+
2501
+ if as_of:
2502
+ return self.__filter_dataset_by_as_of(dataset, entity, list(features.keys()), as_of)
2503
+ return dataset
2504
+
2505
+ def __get_entity_and_features_from_process_id(self, process_id):
1450
2506
  """
1451
2507
  DESCRIPTION:
1452
- Internal function to get the associated group names for
1453
- Feature or DataSource OR Entity.
2508
+ Internal function to get entity_columns, feature_columns, and s
2509
+ elected_features using process_id.
1454
2510
 
1455
2511
  PARAMETERS:
1456
- name:
2512
+ process_id:
1457
2513
  Required Argument.
1458
- Specifies the name of the Feature or DataSource or Entity.
2514
+ Specifies the process id of FeatureProcess.
1459
2515
  Types: str
1460
2516
 
1461
- type_:
1462
- Required Argument.
1463
- Specifies the type of the objects stored in feature store.
1464
- Permitted Values:
1465
- * feature
1466
- * data_source
1467
- * entity
1468
- Types: str
1469
-
1470
2517
  RETURNS:
1471
- list
2518
+ entity_id, selected_features
1472
2519
 
1473
2520
  RAISES:
1474
2521
  None
1475
2522
 
1476
2523
  EXAMPLES:
1477
- >>> self.__get_feature_group_names('admissions', 'data_source')
2524
+ >>> fs.__get_entity_and_features_from_process_id('123-acd')
1478
2525
  """
1479
- if type_ == "feature":
1480
- df = self.__get_features_df()
1481
- return [rec.group_name for rec in df[df.name == name].itertuples() if rec.group_name is not None]
1482
- elif type_ == "data_source":
1483
- df = self.__get_feature_group_df()
1484
- return [rec.name for rec in df[df.data_source_name == name].itertuples()]
1485
- elif type_ == "entity":
1486
- df = self.__get_feature_group_df()
1487
- return [rec.name for rec in df[df.entity_name == name].itertuples()]
2526
+ feature_ver = self.__get_feature_version()
2527
+ feature_ver = feature_ver[feature_ver["feature_version"] == process_id]
2528
+
2529
+ # Check if a feature with that process id exists or not. If not, raise error.
2530
+ if feature_ver.shape[0] == 0:
2531
+ res = _FSUtils._get_data_domains(self.__repo, process_id, 'feature_version')
2532
+ if res:
2533
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
2534
+ error_msg = Messages.get_message(msg_code, "Feature", "process id '{}'".format(process_id),
2535
+ self.__data_domain, res)
2536
+ else:
2537
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
2538
+ error_msg = Messages.get_message(msg_code, "Feature", "process id '{}'".format(process_id),
2539
+ self.__data_domain)
2540
+ raise TeradataMlException(error_msg, msg_code)
1488
2541
 
1489
- def __remove_obj(self, name, type_, action="archive"):
2542
+ selected_features = {}
2543
+ for f_ver in feature_ver.itertuples():
2544
+ entity_id = f_ver.entity_id
2545
+ selected_features[f_ver.feature_name] = process_id
2546
+ return entity_id, selected_features
2547
+
2548
+ def __filter_dataset_by_as_of(self, dataset, entity_column, features_column_list, as_of):
1490
2549
  """
1491
2550
  DESCRIPTION:
1492
- Internal function to get the remove Feature or DataSource OR
1493
- Entity from repo.
2551
+ Internal function to filter the dataset using as_of and
2552
+ return only required columns.
1494
2553
 
1495
2554
  PARAMETERS:
1496
- name:
2555
+ dataset:
1497
2556
  Required Argument.
1498
- Specifies the name of the Feature or DataSource or Entity.
2557
+ Specifies the teradataml DataFrame.
2558
+ Types: teradataml DataFrame
2559
+
2560
+ entity_column:
2561
+ Required Argument.
2562
+ Specifies the column name of entity.
1499
2563
  Types: str
1500
2564
 
1501
- type_:
1502
- Required Argument.
1503
- Specifies the type of "name".
2565
+ features_column_list:
2566
+ Required Argument.
2567
+ Specifies the list of feature columns list.
2568
+ Types: list of str
2569
+
2570
+ as_of:
2571
+ Required Argument.
2572
+ Specifies the time to retrieve the Feature Values instead of
2573
+ retrieving the latest values.
2574
+ Notes:
2575
+ * Applicable only when "process_id" is passed to the function.
2576
+ * Ignored when "dataset_name" is passed.
2577
+ Types: str or datetime.datetime
2578
+
2579
+ RETURNS:
2580
+ teradataml DataFrame
2581
+
2582
+ RAISES:
2583
+ None
2584
+
2585
+ EXAMPLES:
2586
+ >>> load_examples_data("dataframe", "sales")
2587
+ >>> df = DataFrame("sales")
2588
+ >>> fs.__filter_dataset_by_as_of(df, "accounts", ["Jan", "Feb"], datetime.datetime(2025, 1, 1))
2589
+
2590
+ """
2591
+ conditions = [
2592
+ (dataset[f"{f}_start_time"] <= as_of) & (as_of <= dataset[f"{f}_end_time"])
2593
+ for f in features_column_list
2594
+ ]
2595
+ combined_condition = reduce(operator.and_, conditions)
2596
+ required_columns = UtilFuncs._as_list(entity_column) + features_column_list
2597
+ return dataset[combined_condition].select(required_columns)
2598
+
2599
+ def __get_feature_group_names(self, name, type_):
2600
+ """
2601
+ DESCRIPTION:
2602
+ Internal function to get the associated group names for
2603
+ Feature or DataSource OR Entity.
2604
+
2605
+ PARAMETERS:
2606
+ name:
2607
+ Required Argument.
2608
+ Specifies the name of the Feature or DataSource or Entity.
2609
+ Types: str
2610
+
2611
+ type_:
2612
+ Required Argument.
2613
+ Specifies the type of the objects stored in feature store.
2614
+ Permitted Values:
2615
+ * feature
2616
+ * data_source
2617
+ * entity
2618
+ Types: str
2619
+
2620
+ RETURNS:
2621
+ list
2622
+
2623
+ RAISES:
2624
+ None
2625
+
2626
+ EXAMPLES:
2627
+ >>> self.__get_feature_group_names('admissions', 'data_source')
2628
+ """
2629
+ if type_ == "feature":
2630
+ df = self.__get_features_df()
2631
+ return [rec.group_name for rec in df[df.name == name].itertuples() if rec.group_name is not None]
2632
+ elif type_ == "data_source":
2633
+ df = self.__get_feature_group_df()
2634
+ return [rec.name for rec in df[df.data_source_name == name].itertuples()]
2635
+ elif type_ == "entity":
2636
+ df = self.__get_feature_group_df()
2637
+ return [rec.name for rec in df[df.entity_name == name].itertuples()]
2638
+
2639
+ def __remove_obj(self, name, type_, action="archive"):
2640
+ """
2641
+ DESCRIPTION:
2642
+ Internal function to get the remove Feature or DataSource OR
2643
+ Entity from repo.
2644
+
2645
+ PARAMETERS:
2646
+ name:
2647
+ Required Argument.
2648
+ Specifies the name of the Feature or DataSource or Entity.
2649
+ Types: str
2650
+
2651
+ type_:
2652
+ Required Argument.
2653
+ Specifies the type of "name".
1504
2654
  Types: str
1505
2655
  Permitted Values:
1506
2656
  * feature
@@ -1539,6 +2689,9 @@ class FeatureStore:
1539
2689
  if isinstance(name, _vars[type_]["class"]):
1540
2690
  name = name.name
1541
2691
 
2692
+ # Get the feature info DataFrame.
2693
+ feature_info_df = self.__get_feature_info_df()
2694
+
1542
2695
  # Before removing it, check if it is associated with any FeatureGroup.
1543
2696
  # If yes, raise error. Applicable only for Archive.
1544
2697
  if action == "archive":
@@ -1550,6 +2703,47 @@ class FeatureStore:
1550
2703
  raise TeradataMlException(Messages.get_message(
1551
2704
  MessageCodes.FUNC_EXECUTION_FAILED, '{}_{}'.format(action, type_), message),
1552
2705
  MessageCodes.FUNC_EXECUTION_FAILED)
2706
+ # Check if the feature or entity exists in Feature metadata table.
2707
+ # If yes, then raise error. Applicable only for Archive.
2708
+ info_checks = {
2709
+ 'feature': ('name', MessageCodes.EFS_FEATURE_IN_CATALOG),
2710
+ 'entity': ('entity_name', MessageCodes.EFS_ENTITY_IN_CATALOG)
2711
+ }
2712
+ if type_ in info_checks:
2713
+ col, error_code = info_checks[type_]
2714
+ validate_df = feature_info_df[feature_info_df[col].isin([name])]
2715
+ if validate_df.shape[0] > 0:
2716
+ if type_ == "entity":
2717
+ related_features = [feature.name for feature in validate_df.itertuples()]
2718
+ features = ", ".join(("'{}'".format(f) for f in related_features))
2719
+ err_msg = Messages.get_message(error_code,
2720
+ name,
2721
+ features)
2722
+ else:
2723
+ err_msg = Messages.get_message(error_code,
2724
+ name)
2725
+ raise TeradataMlException(err_msg, error_code)
2726
+
2727
+ stg_table = _FeatureStoreDFContainer.get_df("{}_staging".format(type_), self.__repo, self.__data_domain)
2728
+ stg_table = stg_table[stg_table.name == name]
2729
+ if stg_table.shape[0] > 0:
2730
+ print("{} '{}' is already archived.".format(c_name_, name))
2731
+ return False
2732
+
2733
+ # Validation for delete action - ensure object is already archived
2734
+ if action == "delete":
2735
+ # Check if object exists in main table (not archived)
2736
+ main_table_name = self.__table_names[type_]
2737
+ main_df = _FeatureStoreDFContainer.get_df(type_, self.__repo, self.__data_domain)
2738
+ existing_records = main_df[(main_df["name"] == name)]
2739
+
2740
+ if existing_records.shape[0] > 0:
2741
+ error_code = MessageCodes.EFS_DELETE_BEFORE_ARCHIVE
2742
+ error_msg = Messages.get_message(error_code,
2743
+ c_name_,
2744
+ name,
2745
+ type_)
2746
+ raise TeradataMlException(error_msg, error_code)
1553
2747
 
1554
2748
  if type_ == "entity":
1555
2749
  res = self._remove_entity(name, action)
@@ -1560,7 +2754,8 @@ class FeatureStore:
1560
2754
 
1561
2755
  res = _delete_data(table_name=table_name,
1562
2756
  schema_name=self.__repo,
1563
- delete_conditions=(Col("name") == name)
2757
+ delete_conditions=(Col("name") == name) &
2758
+ (Col("data_domain") == self.__data_domain)
1564
2759
  )
1565
2760
 
1566
2761
  if res == 1:
@@ -1607,13 +2802,15 @@ class FeatureStore:
1607
2802
  # remove it from xref table first.
1608
2803
  _delete_data(table_name=ent_table_xref,
1609
2804
  schema_name=self.__repo,
1610
- delete_conditions=(Col("entity_name") == name)
2805
+ delete_conditions=(Col("entity_name") == name) &
2806
+ (Col("data_domain") == self.__data_domain)
1611
2807
  )
1612
2808
 
1613
2809
  # remove from entity table.
1614
2810
  res = _delete_data(table_name=ent_table,
1615
2811
  schema_name=self.__repo,
1616
- delete_conditions=(Col("name") == name)
2812
+ delete_conditions=(Col("name") == name) &
2813
+ (Col("data_domain") == self.__data_domain)
1617
2814
  )
1618
2815
 
1619
2816
  return res
@@ -1623,7 +2820,7 @@ class FeatureStore:
1623
2820
  DESCRIPTION:
1624
2821
  Archives DataSource from repository. Note that archived DataSource
1625
2822
  is not available for any further processing. Archived DataSource can be
1626
- viewed using "list_archived_data_sources()" method.
2823
+ viewed using "list_data_sources(archived=True)" method.
1627
2824
 
1628
2825
  PARAMETERS:
1629
2826
  data_source:
@@ -1639,30 +2836,60 @@ class FeatureStore:
1639
2836
  TeradataMLException, TypeError, ValueError
1640
2837
 
1641
2838
  EXAMPLES:
1642
- >>> from teradataml import DataSource, FeatureStore, load_example_data
1643
- # Create a DataSource using SELECT statement.
1644
- >>> ds = DataSource(name="sales_data", source="select * from sales")
2839
+ >>> from teradataml import DataFrame, DataSource, FeatureStore
1645
2840
  # Create FeatureStore for repo 'vfs_v1'.
1646
2841
  >>> fs = FeatureStore("vfs_v1")
2842
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
2843
+ # Setup FeatureStore for this repository.
2844
+ >>> fs.setup()
2845
+ True
2846
+
2847
+ # Example 1: Archive the DataSource 'sales_data' in the repo 'vfs_v1' using DataSource object.
2848
+ # Create a DataSource using SELECT statement.
2849
+ >>> ds = DataSource(name="sales_data", source="select * from sales")
1647
2850
  # Apply DataSource to FeatureStore.
1648
2851
  >>> fs.apply(ds)
1649
2852
  True
2853
+
1650
2854
  # List the available DataSources.
1651
2855
  >>> fs.list_data_sources()
1652
- description timestamp_col_name source
1653
- name
1654
- sales_data None None select * from sales
2856
+ description timestamp_column source creation_time modified_time
2857
+ name data_domain
2858
+ sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None
1655
2859
 
1656
2860
  # Archive DataSource with name "sales_data".
1657
2861
  >>> fs.archive_data_source("sales_data")
1658
2862
  DataSource 'sales_data' is archived.
1659
2863
  True
1660
- >>>
2864
+
1661
2865
  # List the available DataSources after archive.
1662
- >>> fs.list_data_sources()
1663
- Empty DataFrame
1664
- Columns: [description, timestamp_col_name, source]
1665
- Index: []
2866
+ >>> fs.list_data_sources(archived=True)
2867
+ name data_domain description timestamp_column source creation_time modified_time archived_time
2868
+ 0 sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None 2025-07-28 04:25:55.430000
2869
+
2870
+ # Example 2: Archive the DataSource 'sales_data' in the repo 'vfs_v1' using DataSource name.
2871
+ # Create a DataSource using teradataml DataFrame.
2872
+ >>> from teradataml import DataFrame
2873
+ >>> load_example_data('dataframe', ['sales'])
2874
+ >>> df = DataFrame("sales")
2875
+ >>> ds2 = DataSource(name="sales_data_df", source=df)
2876
+
2877
+ # Apply DataSource to FeatureStore.
2878
+ >>> fs.apply(ds2)
2879
+ True
2880
+
2881
+ # Archive DataSource with name "sales_data_df".
2882
+ >>> fs.archive_data_source("sales_data_df")
2883
+ DataSource 'sales_data_df' is archived.
2884
+ True
2885
+
2886
+ # List the available DataSources after archive.
2887
+ >>> fs.list_data_sources(archived=True)
2888
+ name data_domain description timestamp_column source creation_time modified_time archived_time
2889
+ 0 sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None 2025-07-28 04:25:55.430000
2890
+ 1 sales_data_df ALICE None None select * from sales 2025-07-28 04:26:10.123456 None 2025-07-28 04:26:45.456789
2891
+
2892
+
1666
2893
  """
1667
2894
  return self.__remove_obj(name=data_source, type_="data_source")
1668
2895
 
@@ -1686,16 +2913,23 @@ class FeatureStore:
1686
2913
 
1687
2914
  EXAMPLES:
1688
2915
  >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1689
- >>> load_example_data('dataframe', ['sales'])
1690
2916
  # Create teradataml DataFrame.
2917
+ >>> load_example_data('dataframe', ['sales'])
1691
2918
  >>> df = DataFrame("sales")
2919
+
2920
+ # Create FeatureStore for repo 'vfs_v1'.
2921
+ >>> fs = FeatureStore("vfs_v1")
2922
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
2923
+ # Setup FeatureStore for this repository.
2924
+ >>> fs.setup()
2925
+ True
2926
+
1692
2927
  # Create DataSource with source as teradataml DataFrame.
1693
2928
  >>> ds = DataSource(name="sales_data", source=df)
1694
- # # Create FeatureStore for repo 'vfs_v1'.
1695
- >>> fs = FeatureStore("vfs_v1")
1696
2929
  # Apply the DataSource to FeatureStore.
1697
2930
  >>> fs.apply(ds)
1698
2931
  True
2932
+
1699
2933
  # Let's first archive the DataSource.
1700
2934
  >>> fs.archive_data_source("sales_data")
1701
2935
  DataSource 'sales_data' is archived.
@@ -1705,7 +2939,12 @@ class FeatureStore:
1705
2939
  >>> fs.delete_data_source("sales_data")
1706
2940
  DataSource 'sales_data' is deleted.
1707
2941
  True
1708
- >>>
2942
+
2943
+ # List the available DataSources after delete.
2944
+ >>> fs.list_data_sources()
2945
+ Empty DataFrame
2946
+ Columns: [description, timestamp_column, source, creation_time, modified_time]
2947
+ Index: []
1709
2948
  """
1710
2949
  return self.__remove_obj(name=data_source, type_="data_source", action="delete")
1711
2950
 
@@ -1714,7 +2953,7 @@ class FeatureStore:
1714
2953
  DESCRIPTION:
1715
2954
  Archives Feature from repository. Note that archived Feature
1716
2955
  is not available for any further processing. Archived Feature can be
1717
- viewed using "list_archived_features()" method.
2956
+ viewed using "list_features(archived=True)" method.
1718
2957
 
1719
2958
  PARAMETERS:
1720
2959
  feature:
@@ -1731,36 +2970,62 @@ class FeatureStore:
1731
2970
 
1732
2971
  EXAMPLES:
1733
2972
  >>> from teradataml import DataFrame, Feature, FeatureStore
1734
- >>> load_example_data('dataframe', ['sales'])
1735
2973
  # Create teradataml DataFrame.
2974
+ >>> load_example_data('dataframe', ['sales'])
1736
2975
  >>> df = DataFrame("sales")
2976
+
2977
+ # Create FeatureStore for repo 'vfs_v1'.
2978
+ >>> fs = FeatureStore("vfs_v1")
2979
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
2980
+ # Setup FeatureStore for this repository.
2981
+ >>> fs.setup()
2982
+ True
2983
+
2984
+ Example 1: Archive the Feature 'sales_data_Feb' in the repo 'vfs_v1' using Feature object.
1737
2985
  # Create Feature for Column 'Feb'.
1738
2986
  >>> feature = Feature(name="sales_data_Feb", column=df.Feb)
1739
- # Create FeatureStore for the repo 'staging_repo'.
1740
- >>> fs = FeatureStore("staging_repo")
1741
2987
  # Apply the Feature to FeatureStore.
1742
2988
  >>> fs.apply(feature)
1743
2989
  True
2990
+
1744
2991
  # List the available Features.
1745
2992
  >>> fs.list_features()
1746
- column_name description creation_time modified_time tags data_type feature_type status group_name
1747
- name
1748
- sales_data_Feb Feb None 2024-10-03 18:21:03.720464 None None FLOAT CONTINUOUS ACTIVE None
2993
+ id column_name description tags data_type feature_type status creation_time modified_time group_name
2994
+ name data_domain
2995
+ sales_data_Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None None
1749
2996
 
1750
2997
  # Archive Feature with name "sales_data_Feb".
1751
2998
  >>> fs.archive_feature(feature=feature)
1752
2999
  Feature 'sales_data_Feb' is archived.
1753
3000
  True
1754
- # List the available Features after archive.
1755
- >>> fs.list_features()
1756
- Empty DataFrame
1757
- Columns: [column_name, description, creation_time, modified_time, tags, data_type, feature_type, status, group_name]
1758
- Index: []
1759
- >>>
3001
+
3002
+ # List the available archived Features.
3003
+ >>> fs.list_features(archived=True)
3004
+ id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
3005
+ 0 1 sales_data_Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None 2025-07-28 04:41:35.600000 None
3006
+
3007
+ # Example 2: Archive the Feature 'sales_data_Feb' in the repo 'vfs_v1' using feature name.
3008
+ # Create Feature for Column 'Jan'.
3009
+ >>> feature2 = Feature(name="sales_data_Jan", column=df.Jan)
3010
+ # Apply the Feature to FeatureStore.
3011
+ >>> fs.apply(feature2)
3012
+ True
3013
+
3014
+ # Archive Feature with name "sales_data_Jan".
3015
+ >>> fs.archive_feature(feature="sales_data_Jan")
3016
+ Feature 'sales_data_Jan' is archived.
3017
+ True
3018
+
3019
+ # List the available archived Features.
3020
+ >>> fs.list_features(archived=True)
3021
+ id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
3022
+ 0 1 sales_data_Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None 2025-07-28 04:41:35.600000 None
3023
+ 1 2 sales_data_Jan ALICE Jan None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:42:01.641026 None 2025-07-28 04:43:35.600000 None
3024
+
1760
3025
  """
1761
3026
  return self.__remove_obj(name=feature, type_="feature")
1762
3027
 
1763
- def delete(self):
3028
+ def delete(self, force=False):
1764
3029
  """
1765
3030
  DESCRIPTION:
1766
3031
  Removes the FeatureStore and its components from repository.
@@ -1777,7 +3042,15 @@ class FeatureStore:
1777
3042
  to not use this function.
1778
3043
 
1779
3044
  PARAMETERS:
1780
- None
3045
+ force:
3046
+ Optional Argument.
3047
+ Specifies whether to forcefully delete feature store or not.
3048
+ When set to True, delete() method proceeds to drop objects
3049
+ even if previous step is errored. Otherwise, delete() method
3050
+ raises the exception at the first error and do not proceed to
3051
+ remove other objects.
3052
+ Defaults: False
3053
+ Types: bool
1781
3054
 
1782
3055
  RETURNS:
1783
3056
  bool.
@@ -1789,23 +3062,36 @@ class FeatureStore:
1789
3062
  # Setup FeatureStore for repo 'vfs_v1'.
1790
3063
  >>> from teradataml import FeatureStore
1791
3064
  >>> fs = FeatureStore("vfs_v1")
3065
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3066
+
3067
+ # Setup FeatureStore.
1792
3068
  >>> fs.setup()
1793
3069
  True
1794
- >>> # Delete FeatureStore.
3070
+
3071
+ # Delete the FeatureStore and all its components.
1795
3072
  >>> fs.delete()
3073
+ The function removes Feature Store and drops the corresponding repo also. Are you sure you want to proceed? (Y/N): Y
3074
+ True
3075
+
3076
+ # Forcefully delete the FeatureStore and all its components.
3077
+ >>> fs.delete(force=True)
3078
+ The function removes Feature Store and drops the corresponding repo also. Are you sure you want to proceed? (Y/N): Y
1796
3079
  True
1797
- >>>
1798
3080
  """
3081
+ _args = []
3082
+ _args.append(["force", force, True, (bool)])
3083
+ # Validate argument types
3084
+ _Validators._validate_function_arguments(_args)
3085
+
1799
3086
  confirmation = input("The function removes Feature Store and drops the "
1800
3087
  "corresponding repo also. Are you sure you want to proceed? (Y/N): ")
1801
3088
 
1802
3089
  if confirmation in ["Y", "y"]:
1803
- return self.__drop_feature_store_objects(self.__repo)
3090
+ return self.__drop_feature_store_objects(force=force)
1804
3091
 
1805
3092
  return False
1806
3093
 
1807
- @staticmethod
1808
- def __drop_feature_store_objects(repo_name):
3094
+ def __drop_feature_store_objects(self, force=False):
1809
3095
  """
1810
3096
  DESCRIPTION:
1811
3097
  Removes the FeatureStore and it's components from repository.
@@ -1816,37 +3102,77 @@ class FeatureStore:
1816
3102
  Specifies the name of the repository.
1817
3103
  Types: str
1818
3104
 
3105
+ force:
3106
+ Optional Argument.
3107
+ Specifies whether to forcefully delete feature store or not.
3108
+ When set to True, delete() method proceeds to drop objects
3109
+ even if previous step is errored. Otherwise, delete() method
3110
+ raises the exception at the first error and do not proceed to
3111
+ remove other objects.
3112
+ Defaults: False.
3113
+ Types: bool
3114
+
1819
3115
  RETURNS:
1820
3116
  bool
1821
3117
  """
1822
3118
  # Drop all the tables and staging tables.
1823
3119
  tables_ = [
1824
- EFS_GROUP_FEATURES_SPEC["table_name"],
1825
- EFS_FEATURE_GROUP_SPEC["table_name"],
1826
- EFS_FEATURES_SPEC['table_name'],
1827
- EFS_ENTITY_XREF_SPEC['table_name'],
1828
- EFS_ENTITY_SPEC["table_name"],
1829
- EFS_DATA_SOURCE_SPEC["table_name"]
3120
+ self.__table_names["group_features"],
3121
+ self.__table_names["feature_group"],
3122
+ self.__table_names['feature'],
3123
+ self.__table_names['entity_xref'],
3124
+ self.__table_names['entity'],
3125
+ self.__table_names['data_source'],
3126
+ self.__table_names['feature_process'],
3127
+ self.__table_names['feature_runs'],
3128
+ self.__table_names['feature_metadata'],
3129
+ self.__table_names['dataset_catalog'],
3130
+ self.__table_names['dataset_features'],
3131
+ self.__table_names['data_domain'],
3132
+ self.__table_names['version']
1830
3133
  ]
1831
3134
 
1832
3135
  tables_stg_ = [
1833
- EFS_FEATURES_STAGING_SPEC['table_name'],
1834
- EFS_ENTITY_STAGING_SPEC["table_name"],
1835
- EFS_ENTITY_XREF_STAGING_SPEC["table_name"],
1836
- EFS_DATA_SOURCE_STAGING_SPEC["table_name"],
1837
- EFS_FEATURE_GROUP_STAGING_SPEC["table_name"],
1838
- EFS_GROUP_FEATURES_STAGING_SPEC["table_name"]
3136
+ self.__table_names['feature_staging'],
3137
+ self.__table_names["entity_staging"],
3138
+ self.__table_names["entity_staging_xref"],
3139
+ self.__table_names["data_source_staging"],
3140
+ self.__table_names["feature_group_staging"],
3141
+ self.__table_names["group_features_staging"]
1839
3142
  ]
1840
3143
 
1841
3144
  # Drop all the triggers first. So that tables can be dropped.
1842
- triggers = ["{}_trg".format(table) for table in tables_]
1843
- for trigger in triggers:
1844
- execute_sql("drop trigger {}.{}".format(repo_name, trigger))
1845
-
1846
- for table in (tables_ + [EFS_VERSION_SPEC["table_name"]] + tables_stg_):
1847
- db_drop_table(table, schema_name=repo_name)
1848
-
1849
- execute_sql("DROP DATABASE {}".format(repo_name))
3145
+ ignr_errors = 'all' if force else None
3146
+ for trigger in EFS_TRIGGERS.values():
3147
+ execute_sql("drop trigger {}.{}".format(self.__repo, trigger),
3148
+ ignore_errors=ignr_errors)
3149
+
3150
+ # Drop the views first.
3151
+ views_ = [EFS_DB_COMPONENTS['feature_version']]
3152
+ for view in views_:
3153
+ db_drop_view(view, schema_name=self.__repo, suppress_error=force)
3154
+
3155
+ # Drop datesets.
3156
+ # Used EFS_DB_COMPONENTS['dataset_catalog'] because it contains all the datasets.
3157
+ # The get_df methods are filtered by data_domain, hence they don't show all datasets.
3158
+ for dataset in DataFrame(in_schema(self.__repo, EFS_DB_COMPONENTS['dataset_catalog'])).itertuples():
3159
+ db_drop_view(dataset.name, schema_name=self.__repo, suppress_error=force)
3160
+
3161
+ # Drop all the Feature tables.
3162
+ dropped_tab = set()
3163
+ # Used EFS_DB_COMPONENTS['feature_metadata'] because it contains all the features.
3164
+ # The get_df methods are filtered by data_domain, hence they don't show all features.
3165
+ for rec in DataFrame(in_schema(self.__repo, EFS_DB_COMPONENTS['feature_metadata'])).itertuples():
3166
+ # Avoid dropping the same table again.
3167
+ dropped_tab.add(rec.table_name)
3168
+
3169
+ for table in dropped_tab:
3170
+ db_drop_table(table, schema_name=self.__repo, suppress_error=force)
3171
+
3172
+ for table in (tables_ + tables_stg_):
3173
+ db_drop_table(table, schema_name=self.__repo, suppress_error=force)
3174
+
3175
+ execute_sql(f"DROP DATABASE {self.__repo}")
1850
3176
 
1851
3177
  return True
1852
3178
 
@@ -1870,16 +3196,30 @@ class FeatureStore:
1870
3196
 
1871
3197
  EXAMPLES:
1872
3198
  >>> from teradataml import DataFrame, Feature, FeatureStore
1873
- >>> load_example_data('dataframe', ['sales'])
1874
3199
  # Create teradataml DataFrame.
3200
+ >>> load_example_data('dataframe', ['sales'])
1875
3201
  >>> df = DataFrame("sales")
3202
+
3203
+ # Create FeatureStore for repo 'vfs_v1'.
3204
+ >>> fs = FeatureStore("vfs_v1")
3205
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3206
+ # Setup FeatureStore for this repository.
3207
+ >>> fs.setup()
3208
+ True
3209
+
3210
+ # Example 1: Delete the Feature 'sales_data_Feb' in the repo 'vfs_v1' using Feature object.
1876
3211
  # Create Feature for Column 'Feb'.
1877
3212
  >>> feature = Feature(name="sales_data_Feb", column=df.Feb)
1878
- # Create a feature store with name "staging_repo".
1879
- >>> fs = FeatureStore("staging_repo")
1880
3213
  # Add the feature created above in the feature store.
1881
3214
  >>> fs.apply(feature)
1882
3215
  True
3216
+
3217
+ # List the available Features.
3218
+ >>> fs.list_features()
3219
+ id column_name description tags data_type feature_type status creation_time modified_time group_name
3220
+ name data_domain
3221
+ sales_data_Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:49:55.827391 None None
3222
+
1883
3223
  # Let's first archive the Feature.
1884
3224
  >>> fs.archive_feature(feature=feature)
1885
3225
  Feature 'sales_data_Feb' is archived.
@@ -1889,7 +3229,35 @@ class FeatureStore:
1889
3229
  >>> fs.delete_feature(feature=feature)
1890
3230
  Feature 'sales_data_Feb' is deleted.
1891
3231
  True
1892
- >>>
3232
+
3233
+ # List the available Features after delete.
3234
+ >>> fs.list_features()
3235
+ Empty DataFrame
3236
+ Columns: [id, column_name, description, tags, data_type, feature_type, status, creation_time, modified_time, group_name]
3237
+ Index: []
3238
+
3239
+ Example 2: Delete the Feature 'sales_data_Feb' in the repo 'vfs_v1' using feature name.
3240
+ # Create Feature for Column 'Jan'.
3241
+ >>> feature2 = Feature(name="sales_data_Jan", column=df.Jan)
3242
+ # Add the feature created above in the feature store.
3243
+ >>> fs.apply(feature2)
3244
+ True
3245
+
3246
+ # List the available Features.
3247
+ >>> fs.list_features()
3248
+ id column_name description tags data_type feature_type status creation_time modified_time group_name
3249
+ name data_domain
3250
+ sales_data_Jan ALICE 2 Jan None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:50:55.827391 None None
3251
+
3252
+ # Let's first archive the Feature using feature name.
3253
+ >>> fs.archive_feature(feature="sales_data_Jan")
3254
+ Feature 'sales_data_Jan' is archived.
3255
+ True
3256
+
3257
+ # Delete Feature with name "sales_data_Jan".
3258
+ >>> fs.delete_feature(feature="sales_data_Jan")
3259
+ Feature 'sales_data_Jan' is deleted.
3260
+ True
1893
3261
  """
1894
3262
  return self.__remove_obj(name=feature, type_="feature", action="delete")
1895
3263
 
@@ -1898,7 +3266,7 @@ class FeatureStore:
1898
3266
  DESCRIPTION:
1899
3267
  Archives Entity from repository. Note that archived Entity
1900
3268
  is not available for any further processing. Archived Entity can be
1901
- viewed using "list_archived_entities()" method.
3269
+ viewed using "list_entities(archived=True)" method.
1902
3270
 
1903
3271
  PARAMETERS:
1904
3272
  entity:
@@ -1915,31 +3283,58 @@ class FeatureStore:
1915
3283
 
1916
3284
  EXAMPLES:
1917
3285
  >>> from teradataml import DataFrame, Entity, FeatureStore
1918
- >>> load_example_data('dataframe', ['sales'])
1919
3286
  # Create teradataml DataFrame.
3287
+ >>> load_example_data('dataframe', ['sales'])
1920
3288
  >>> df = DataFrame("sales")
3289
+
3290
+ # Create FeatureStore for repo 'vfs_v1'.
3291
+ >>> fs = FeatureStore("vfs_v1")
3292
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3293
+ # Setup FeatureStore for this repository.
3294
+ >>> fs.setup()
3295
+ True
3296
+
3297
+ # Example 1: Archive the Entity 'sales_data' in the repo 'vfs_v1' using Entity name.
1921
3298
  # Create Entity using teradataml DataFrame Column.
1922
3299
  >>> entity = Entity(name="sales_data", columns=df.accounts)
1923
- # Create FeatureStore for repo 'staging_repo'.
1924
- >>> fs = FeatureStore("staging_repo")
1925
3300
  # Apply the entity to FeatureStore.
1926
3301
  >>> fs.apply(entity)
1927
3302
  True
3303
+
1928
3304
  # List all the available entities.
1929
3305
  >>> fs.list_entities()
1930
- description
1931
- name entity_column
1932
- sales_data accounts None
3306
+ description creation_time modified_time entity_column
3307
+ name data_domain
3308
+ sales_data ALICE None 2025-07-28 04:54:34.687139 None accounts
1933
3309
 
1934
3310
  # Archive Entity with name "sales_data".
1935
3311
  >>> fs.archive_entity(entity=entity.name)
1936
3312
  Entity 'sales_data' is archived.
1937
3313
  True
3314
+
1938
3315
  # List the entities after archive.
1939
- >>> fs.list_entities()
1940
- Empty DataFrame
1941
- Columns: [description]
1942
- Index: []
3316
+ >>> fs.list_entities(archived=True)
3317
+ name data_domain description creation_time modified_time archived_time entity_column
3318
+ 0 sales_data ALICE None 2025-07-28 04:54:34.687139 None 2025-07-28 04:55:46.750000 accounts
3319
+
3320
+ # Example 2: Archive the Entity 'sales_data' in the repo 'vfs_v1' using Entity object.
3321
+ # Create Entity using teradataml DataFrame Column.
3322
+ >>> entity2 = Entity(name="sales_data_df", columns=df.accounts)
3323
+ # Apply the entity to FeatureStore.
3324
+ >>> fs.apply(entity2)
3325
+ True
3326
+
3327
+ # Archive Entity with Entity object.
3328
+ >>> fs.archive_entity(entity=entity2)
3329
+ Entity 'sales_data_df' is archived.
3330
+ True
3331
+
3332
+ # List the entities after archive.
3333
+ >>> fs.list_entities(archived=True)
3334
+ name data_domain description creation_time modified_time archived_time entity_column
3335
+ 0 sales_data ALICE None 2025-07-28 04:54:34.687139 None 2025-07-28 04:55:46.750000 accounts
3336
+ 1 sales_data_df ALICE None 2025-07-28 04:56:01.123456 None 2025-07-28 04:57:35.456789 accounts
3337
+
1943
3338
  """
1944
3339
  return self.__remove_obj(name=entity, type_="entity")
1945
3340
 
@@ -1963,16 +3358,30 @@ class FeatureStore:
1963
3358
 
1964
3359
  EXAMPLES:
1965
3360
  >>> from teradataml import DataFrame, Entity, FeatureStore
1966
- >>> load_example_data('dataframe', ['sales'])
1967
3361
  # Create teradataml DataFrame.
3362
+ >>> load_example_data('dataframe', ['sales'])
1968
3363
  >>> df = DataFrame("sales")
3364
+
3365
+ # Create FeatureStore for repo 'vfs_v1'.
3366
+ >>> fs = FeatureStore("vfs_v1")
3367
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3368
+ # Setup FeatureStore for this repository.
3369
+ >>> fs.setup()
3370
+ True
3371
+
3372
+ # Example 1: Delete the Entity 'sales_data' in the repo 'vfs_v1' using Entity name.
1969
3373
  # Create Entity using teradataml DataFrame Column.
1970
3374
  >>> entity = Entity(name="sales_data", columns=df.accounts)
1971
- # Create FeatureStore for repo 'staging_repo'.
1972
- >>> fs = FeatureStore("staging_repo")
1973
3375
  # Apply the entity to FeatureStore.
1974
3376
  >>> fs.apply(entity)
1975
3377
  True
3378
+
3379
+ # List all the available entities.
3380
+ >>> fs.list_entities()
3381
+ description creation_time modified_time entity_column
3382
+ name data_domain
3383
+ sales_data ALICE None 2025-07-28 04:58:01.123456 None accounts
3384
+
1976
3385
  # Let's first archive the entity.
1977
3386
  >>> fs.archive_entity(entity=entity.name)
1978
3387
  Entity 'sales_data' is archived.
@@ -1982,7 +3391,35 @@ class FeatureStore:
1982
3391
  >>> fs.delete_entity(entity=entity.name)
1983
3392
  Entity 'sales_data' is deleted.
1984
3393
  True
1985
- >>>
3394
+
3395
+ # List the entities after delete.
3396
+ >>> fs.list_entities()
3397
+ Empty DataFrame
3398
+ Columns: [id, column_name, description, tags, data_type, feature_type, status, creation_time, modified_time, group_name]
3399
+ Index: []
3400
+
3401
+ Example 2: Delete the Entity 'sales_data' in the repo 'vfs_v1' using Entity object.
3402
+ # Create Entity using teradataml DataFrame Column.
3403
+ >>> entity2 = Entity(name="sales_data_df", columns=df.accounts)
3404
+ # Apply the entity to FeatureStore.
3405
+ >>> fs.apply(entity2)
3406
+ True
3407
+
3408
+ # List all the available entities.
3409
+ >>> fs.list_entities()
3410
+ description creation_time modified_time entity_column
3411
+ name data_domain
3412
+ sales_data_df ALICE None 2025-07-28 04:59:14.325456 None accounts
3413
+
3414
+ # Let's first archive the entity.
3415
+ >>> fs.archive_entity(entity=entity2)
3416
+ Entity 'sales_data_df' is archived.
3417
+ True
3418
+
3419
+ # Delete Entity with Entity object.
3420
+ >>> fs.delete_entity(entity=entity2)
3421
+ Entity 'sales_data_df' is deleted.
3422
+ True
1986
3423
  """
1987
3424
  return self.__remove_obj(name=entity, type_="entity", action="delete")
1988
3425
 
@@ -1993,7 +3430,7 @@ class FeatureStore:
1993
3430
  col_expr = Col("name") == features[0]
1994
3431
  for feature in features[1:]:
1995
3432
  col_expr = ((col_expr) | (Col("name") == feature))
1996
-
3433
+ col_expr = col_expr & (Col("data_domain") == self.__data_domain)
1997
3434
  return col_expr
1998
3435
 
1999
3436
  def archive_feature_group(self, feature_group):
@@ -2001,7 +3438,7 @@ class FeatureStore:
2001
3438
  DESCRIPTION:
2002
3439
  Archives FeatureGroup from repository. Note that archived FeatureGroup
2003
3440
  is not available for any further processing. Archived FeatureGroup can be
2004
- viewed using "list_archived_feature_groups()" method.
3441
+ viewed using "list_feature_groups(archived=True)" method.
2005
3442
  Note:
2006
3443
  The function archives the associated Features, Entity and DataSource
2007
3444
  if they are not associated with any other FeatureGroups.
@@ -2021,32 +3458,57 @@ class FeatureStore:
2021
3458
 
2022
3459
  EXAMPLES:
2023
3460
  >>> from teradataml import DataFrame, FeatureGroup, FeatureStore
2024
- >>> load_example_data('dataframe', ['sales'])
2025
3461
  # Create teradataml DataFrame.
3462
+ >>> load_example_data('dataframe', ['sales'])
2026
3463
  >>> df = DataFrame("sales")
3464
+
3465
+ # Create FeatureStore for repo 'vfs_v1'.
3466
+ >>> fs = FeatureStore("vfs_v1", data_domain="d1")
3467
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3468
+ # Setup FeatureStore for this repository.
3469
+ >>> fs.setup()
3470
+ True
3471
+
3472
+ # Example 1: Archive the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup name.
2027
3473
  # Create FeatureGroup from teradataml DataFrame.
2028
- >>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_col_name="datetime")
2029
- # Create FeatureStore for the repo 'staging_repo'.
2030
- >>> fs = FeatureStore("staging_repo")
3474
+ >>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
2031
3475
  # Apply FeatureGroup to FeatureStore.
2032
3476
  >>> fs.apply(fg)
2033
3477
  True
3478
+
2034
3479
  # List all the available FeatureGroups.
2035
3480
  >>> fs.list_feature_groups()
2036
- description data_source_name entity_name
2037
- name
2038
- sales None sales sales
3481
+ description data_source_name entity_name creation_time modified_time
3482
+ name data_domain
3483
+ sales d1 None sales sales 2025-07-28 05:00:19.780453 None
2039
3484
 
2040
3485
  # Archive FeatureGroup with name "sales".
2041
3486
  >>> fs.archive_feature_group(feature_group='sales')
2042
3487
  FeatureGroup 'sales' is archived.
2043
3488
  True
2044
- >>>
3489
+
2045
3490
  # List all the available FeatureGroups after archive.
2046
- >>> fs.list_feature_groups()
2047
- Empty DataFrame
2048
- Columns: [description, data_source_name, entity_name]
2049
- Index: []
3491
+ >>> fs.list_feature_groups(archived=True)
3492
+ name data_domain description data_source_name entity_name creation_time modified_time archived_time
3493
+ 0 sales d1 None sales sales 2025-07-28 05:00:19.780453 None 2025-07-28 05:02:04.100000
3494
+
3495
+ # Example 2: Archive the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup object.
3496
+ # Create FeatureGroup from teradataml DataFrame.
3497
+ >>> fg2 = FeatureGroup.from_DataFrame(name="sales_df", entity_columns="accounts", df=df, timestamp_column="datetime")
3498
+ # Apply FeatureGroup to FeatureStore.
3499
+ >>> fs.apply(fg2)
3500
+ True
3501
+
3502
+ # Archive FeatureGroup with FeatureGroup object.
3503
+ >>> fs.archive_feature_group(feature_group=fg2)
3504
+ FeatureGroup 'sales_df' is archived.
3505
+ True
3506
+
3507
+ # List all the available FeatureGroups after archive.
3508
+ >>> fs.list_feature_groups(archived=True)
3509
+ name data_domain description data_source_name entity_name creation_time modified_time archived_time
3510
+ 0 sales d1 None sales sales 2025-07-28 05:00:19.780453 None 2025-07-28 05:02:04.100000
3511
+ 1 sales_df d1 None sales sales 2025-07-28 05:02:01.123456 None 2025-07-28 05:03:35.456789
2050
3512
  """
2051
3513
  argument_validation_params = []
2052
3514
  argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
@@ -2056,6 +3518,29 @@ class FeatureStore:
2056
3518
 
2057
3519
  feature_group_name = feature_group if isinstance(feature_group, str) else feature_group.name
2058
3520
 
3521
+ stg_table = _FeatureStoreDFContainer.get_df("feature_group_staging", self.__repo, self.__data_domain)
3522
+ stg_table = stg_table[stg_table.name == feature_group_name]
3523
+ if stg_table.shape[0] > 0:
3524
+ print("{} '{}' is already archived.".format('FeatureGroup', feature_group_name))
3525
+ return False
3526
+
3527
+ # Check if FeatureGroup is related to any FeatureProcess
3528
+ feature_process_df = self.list_feature_processes()
3529
+ related_processes = feature_process_df[(feature_process_df['data_source'] == feature_group_name)]
3530
+
3531
+ if related_processes.shape[0] > 0:
3532
+ process_ids = [fp.process_id for fp in related_processes.itertuples()]
3533
+ related_process_ids = "feature process(es) {}".format(process_ids)
3534
+ err_code = MessageCodes.EFS_OBJ_IN_FEATURE_PROCESS
3535
+ err_msg = Messages.get_message(err_code,
3536
+ 'FeatureGroup',
3537
+ feature_group_name,
3538
+ related_process_ids,
3539
+ "feature process(es)",
3540
+ "FeatureStore.archive_feature_process() and FeatureStore.delete_feature_process()",
3541
+ )
3542
+ raise TeradataMlException(err_msg, err_code)
3543
+
2059
3544
  fg = self.get_feature_group(feature_group_name) if isinstance(feature_group, str) else feature_group
2060
3545
 
2061
3546
  fg_df = self.list_feature_groups()
@@ -2132,40 +3617,45 @@ class FeatureStore:
2132
3617
  # Remove data for FeatureGroup from Xref table.
2133
3618
  # This allows to remove data from other tables.
2134
3619
  res = _delete_data(schema_name=self.__repo,
2135
- table_name=EFS_GROUP_FEATURES_SPEC["table_name"],
2136
- delete_conditions=(Col("group_name") == group_name)
3620
+ table_name=self.__table_names["group_features"],
3621
+ delete_conditions=(Col("group_name") == group_name) &
3622
+ (Col("group_data_domain") == self.__data_domain)
2137
3623
  )
2138
3624
 
2139
3625
  # Remove FeatureGroup.
2140
3626
  res = _delete_data(schema_name=self.__repo,
2141
- table_name=EFS_FEATURE_GROUP_SPEC["table_name"],
2142
- delete_conditions=(Col("name") == group_name)
3627
+ table_name=self.__table_names["feature_group"],
3628
+ delete_conditions=(Col("name") == group_name) &
3629
+ (Col("data_domain") == self.__data_domain)
2143
3630
  )
2144
3631
 
2145
3632
  # Remove Features.
2146
3633
  if feature_names:
2147
3634
  _delete_data(schema_name=self.__repo,
2148
- table_name=EFS_FEATURES_SPEC["table_name"],
3635
+ table_name=self.__table_names["feature"],
2149
3636
  delete_conditions=self.__get_features_where_clause(feature_names)
2150
3637
  )
2151
3638
 
2152
3639
  # Remove entities.
2153
3640
  if entity_name:
2154
3641
  _delete_data(schema_name=self.__repo,
2155
- table_name=EFS_ENTITY_XREF_SPEC["table_name"],
2156
- delete_conditions=(Col("entity_name") == entity_name)
3642
+ table_name=self.__table_names["entity_xref"],
3643
+ delete_conditions=(Col("entity_name") == entity_name) &
3644
+ (Col("data_domain") == self.__data_domain)
2157
3645
  )
2158
3646
 
2159
3647
  _delete_data(schema_name=self.__repo,
2160
- table_name=EFS_ENTITY_SPEC["table_name"],
2161
- delete_conditions=(Col("name") == entity_name)
3648
+ table_name=self.__table_names["entity"],
3649
+ delete_conditions=(Col("name") == entity_name) &
3650
+ (Col("data_domain") == self.__data_domain)
2162
3651
  )
2163
3652
 
2164
3653
  # Remove DataSource.
2165
3654
  if data_source_name:
2166
3655
  _delete_data(schema_name=self.__repo,
2167
- table_name=EFS_DATA_SOURCE_SPEC["table_name"],
2168
- delete_conditions=(Col("name") == data_source_name),
3656
+ table_name=self.__table_names["data_source"],
3657
+ delete_conditions=(Col("name") == data_source_name) &
3658
+ (Col("data_domain") == self.__data_domain)
2169
3659
  )
2170
3660
 
2171
3661
  return res
@@ -2195,17 +3685,31 @@ class FeatureStore:
2195
3685
 
2196
3686
  EXAMPLES:
2197
3687
  >>> from teradataml import DataFrame, FeatureGroup, FeatureStore
2198
- >>> load_example_data('dataframe', ['sales'])
2199
3688
  # Create teradataml DataFrame.
3689
+ >>> load_example_data('dataframe', ['sales'])
2200
3690
  >>> df = DataFrame("sales")
3691
+
3692
+ # Create FeatureStore for repo 'vfs_v1'.
3693
+ >>> fs = FeatureStore("vfs_v1", data_domain="d1")
3694
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3695
+ # Setup FeatureStore for this repository.
3696
+ >>> fs.setup()
3697
+ True
3698
+
3699
+ # Example 1: Delete the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup name.
2201
3700
  # Create FeatureGroup from teradataml DataFrame.
2202
- >>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_col_name="datetime")
2203
- # Create FeatureStore for the repo 'staging_repo'.
2204
- >>> fs = FeatureStore("staging_repo")
3701
+ >>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
2205
3702
  # Apply FeatureGroup to FeatureStore.
2206
3703
  >>> fs.apply(fg)
2207
3704
  True
2208
- # Let's first archive FeatureGroup with name "sales".
3705
+
3706
+ # List all the available FeatureGroups.
3707
+ >>> fs.list_feature_groups()
3708
+ description data_source_name entity_name creation_time modified_time
3709
+ name data_domain
3710
+ sales d1 None sales sales 2025-07-28 05:00:19.780453 None
3711
+
3712
+ # Archive FeatureGroup with name "sales".
2209
3713
  >>> fs.archive_feature_group(feature_group='sales')
2210
3714
  FeatureGroup 'sales' is archived.
2211
3715
  True
@@ -2214,7 +3718,29 @@ class FeatureStore:
2214
3718
  >>> fs.delete_feature_group(feature_group='sales')
2215
3719
  FeatureGroup 'sales' is deleted.
2216
3720
  True
2217
- >>>
3721
+
3722
+ # List all the available FeatureGroups after delete.
3723
+ >>> fs.list_feature_groups()
3724
+ Empty DataFrame
3725
+ Columns: [description, data_source_name, entity_name, creation_time, modified_time]
3726
+ Index: []
3727
+
3728
+ Example 2: Delete the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup object.
3729
+ # Create FeatureGroup from teradataml DataFrame.
3730
+ >>> fg2 = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
3731
+ # Apply FeatureGroup to FeatureStore.
3732
+ >>> fs.apply(fg2)
3733
+ True
3734
+
3735
+ # Archive FeatureGroup with FeatureGroup object.
3736
+ >>> fs.archive_feature_group(feature_group=fg2)
3737
+ FeatureGroup 'sales' is archived.
3738
+ True
3739
+
3740
+ # Delete FeatureGroup with FeatureGroup object.
3741
+ >>> fs.delete_feature_group(feature_group=fg2)
3742
+ FeatureGroup 'sales' is deleted.
3743
+ True
2218
3744
  """
2219
3745
  argument_validation_params = []
2220
3746
  argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
@@ -2224,95 +3750,839 @@ class FeatureStore:
2224
3750
 
2225
3751
  fg_name = feature_group if isinstance(feature_group, str) else feature_group.name
2226
3752
 
3753
+ # Validation for delete action - ensure FeatureGroup is already archived
3754
+ main_fg_df = self.__get_feature_group_df()
3755
+ existing_records = main_fg_df[main_fg_df["name"] == fg_name]
3756
+
3757
+ if existing_records.shape[0] > 0:
3758
+ error_code = MessageCodes.EFS_DELETE_BEFORE_ARCHIVE
3759
+ error_msg = Messages.get_message(
3760
+ error_code,
3761
+ 'FeatureGroup',
3762
+ fg_name,
3763
+ 'feature_group')
3764
+ raise TeradataMlException(error_msg, error_code)
3765
+
2227
3766
  # Remove data for FeatureGroup.
2228
3767
  _delete_data(table_name=self.__table_names["group_features_staging"],
2229
3768
  schema_name=self.__repo,
2230
- delete_conditions=(Col("group_name") == fg_name)
3769
+ delete_conditions=(Col("group_name") == fg_name) &
3770
+ (Col("group_data_domain") == self.__data_domain)
2231
3771
  )
2232
3772
 
2233
3773
  res = _delete_data(table_name=self.__table_names["feature_group_staging"],
2234
3774
  schema_name=self.__repo,
2235
- delete_conditions=(Col("name") == fg_name)
3775
+ delete_conditions=(Col("name") == fg_name) &
3776
+ (Col("data_domain") == self.__data_domain)
2236
3777
  )
2237
3778
 
2238
3779
  if res == 1:
2239
3780
  print("FeatureGroup '{}' is deleted.".format(fg_name))
2240
3781
  return True
2241
3782
 
2242
- print("FeatureGroup '{}' not exist to delete.".format(fg_name))
3783
+ print("FeatureGroup '{}' does not exist to delete.".format(fg_name))
2243
3784
  return False
2244
3785
 
2245
- def __get_obj_df(self, obj_type):
3786
+ @property
3787
+ def version(self):
2246
3788
  """
2247
3789
  DESCRIPTION:
2248
- Internal method to return either Features DataFrame OR Entity DataFrame
2249
- OR DataSource DataFrame OR FeatureGroup DataFrame.
3790
+ Get the FeatureStore version.
2250
3791
 
2251
3792
  PARAMETERS:
2252
- obj_type
2253
- Required Argument.
2254
- Specifies the type of DataFrame to return.
2255
- Allowed Values:
2256
- * feature
2257
- * feature_group
2258
- * entity
2259
- * data_source
2260
- * group_features
3793
+ None
2261
3794
 
2262
3795
  RETURNS:
2263
- teradataml DataFrame.
3796
+ str
2264
3797
 
2265
3798
  RAISES:
2266
3799
  None
2267
3800
 
2268
3801
  EXAMPLES:
2269
- fs.__get_features_df()
2270
- """
2271
- if obj_type not in self.__df_container:
2272
- from teradataml.dataframe.dataframe import in_schema
2273
-
2274
- # For feature or feature_staging, join it with xref table
2275
- # so group name appears while listing features.
2276
- map_ = {"feature": "group_features", "feature_staging": "group_features_staging"}
2277
- if obj_type in map_:
2278
- features = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
2279
- features_xref = DataFrame(in_schema(self.__repo, self.__table_names[map_[obj_type]])).select(
2280
- ["feature_name", "group_name"])
2281
- df = features.join(features_xref, on="name==feature_name", how='left')
2282
- self.__df_container[obj_type] = df.select(features.columns+["group_name"])
2283
- # For entity, join with xref table.
2284
- elif obj_type == "entity" or obj_type == "entity_staging":
2285
- ent_df = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
2286
- xref_df = DataFrame(in_schema(self.__repo, self.__table_names["{}_xref".format(obj_type)])).select(
2287
- ['entity_name', 'entity_column'])
2288
- df = ent_df.join(xref_df, on="name==entity_name", how="inner")
2289
- self.__df_container[obj_type] = df.select(ent_df.columns+["entity_column"])
2290
- else:
2291
- self.__df_container[obj_type] = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
3802
+ # Example 1: Get the version of FeatureStore version for
3803
+ # the repo 'vfs_v1'.
3804
+ >>> from teradataml import FeatureStore
3805
+ >>> fs = FeatureStore('vfs_v1')
3806
+ FeatureStore is ready to use.
2292
3807
 
2293
- return self.__df_container[obj_type]
3808
+ # Get the version of FeatureStore.
3809
+ >>> fs.version
3810
+ '2.0.0'
3811
+ """
3812
+ if self.__version is None:
3813
+ self.__version = self.__get_version()
3814
+ return self.__version
2294
3815
 
2295
- def version(self):
3816
+ def list_feature_catalogs(self) -> DataFrame:
2296
3817
  """
2297
3818
  DESCRIPTION:
2298
- Get the FeatureStore version.
3819
+ Lists all the feature catalogs.
2299
3820
 
2300
3821
  PARAMETERS:
2301
3822
  None
2302
3823
 
2303
3824
  RETURNS:
2304
- str
3825
+ teradataml DataFrame
2305
3826
 
2306
3827
  RAISES:
2307
3828
  None
2308
3829
 
2309
3830
  EXAMPLES:
2310
- # Example 1: Get the version of FeatureStore version for
2311
- # the repo 'vfs_v1'.
3831
+ # Example 1: List all the feature catalogs in the repo 'vfs_v1'.
2312
3832
  >>> from teradataml import FeatureStore
2313
- >>> fs = FeatureStore('vfs_v1')
2314
- >>> fs.version()
2315
- '1.0.0'
2316
- >>>
3833
+
3834
+ # Create FeatureStore for the repo 'vfs_v1' or use existing one.
3835
+ >>> fs = FeatureStore("vfs_v1")
3836
+ FeatureStore is ready to use.
3837
+
3838
+ # Load the sales data.
3839
+ >>> load_example_data("dataframe", "sales")
3840
+ >>> df = DataFrame("sales")
3841
+
3842
+ # Create a feature process.
3843
+ >>> from teradataml import FeatureProcess
3844
+ >>> fp = FeatureProcess(repo="vfs_v1",
3845
+ ... data_domain='sales',
3846
+ ... object=df,
3847
+ ... entity="accounts",
3848
+ ... features=["Jan", "Feb", "Mar", "Apr"])
3849
+ >>> fp.run()
3850
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
3851
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
3852
+
3853
+ # List all the feature catalogs in the repo 'vfs_v1'.
3854
+ >>> fs.list_feature_catalogs()
3855
+ data_domain feature_id table_name valid_start valid_end
3856
+ entity_name
3857
+ accounts sales 2 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
3858
+ accounts sales 100001 FS_T_e84ff803_3d5c_4793_cd72_251c780fffe4 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
3859
+ accounts sales 1 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
3860
+ accounts sales 200001 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
3861
+ """
3862
+ df = self.__get_without_valid_period_df(self.__get_features_metadata_df())
3863
+ return df[df.data_domain==self.__data_domain]
3864
+
3865
+ def archive_feature_process(self, process_id):
3866
+ """
3867
+ DESCRIPTION:
3868
+ Archives the FeatureProcess with the given process_id.
3869
+ Notes:
3870
+ * Archived FeatureProcess is not available for any further processing.
3871
+ * Archived FeatureProcess can be viewed using `FeatureStore.list_feature_processes(archived=True)`.
3872
+ method.
3873
+ * Same feature can be ingested by multiple processes. If feature associated with
3874
+ process "process_id" is also associated with other processes, then this
3875
+ function only archives the feature values associated with the process "process_id". Else
3876
+ it archives the feature from the feature catalog. Look at `FeatureCatalog.archive_features()`.
3877
+ for more details.
3878
+
3879
+ PARAMETERS:
3880
+ process_id:
3881
+ Required Argument.
3882
+ Specifies the ID of the FeatureProcess to archive from repository.
3883
+ Types: str
3884
+
3885
+ RETURNS:
3886
+ bool
3887
+
3888
+ RAISES:
3889
+ TeradataMLException, TypeError, ValueError
3890
+
3891
+ EXAMPLES:
3892
+ >>> load_example_data('dataframe', ['sales'])
3893
+ # Create a teradataml DataFrame.
3894
+ >>> from teradataml import DataFrame, FeatureProcess, FeatureStore
3895
+ >>> df = DataFrame("sales")
3896
+
3897
+ # Create FeatureStore for repo 'repo'.
3898
+ >>> fs = FeatureStore("repo", data_domain='sales')
3899
+ Repo repo does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3900
+ # Setup FeatureStore for this repository.
3901
+ >>> fs.setup()
3902
+ True
3903
+
3904
+ # Run FeatureProcess to ingest features.
3905
+ >>> from teradataml import FeatureProcess
3906
+ >>> fp = FeatureProcess(repo='repo',
3907
+ ... data_domain='sales',
3908
+ ... object=df,
3909
+ ... entity='accounts',
3910
+ ... features=['Jan', 'Feb', 'Mar', 'Apr'])
3911
+ >>> fp.run()
3912
+ Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' started.
3913
+ Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' completed.
3914
+
3915
+ # List the available FeatureProcesses.
3916
+ >>> fs.list_feature_processes()
3917
+ description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
3918
+ process_id
3919
+ 2a014f2d-6b71-11f0-aeda-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-07-28 05:10:34.760000+00: 9999-12-31 23:59:59.999999+00:
3920
+
3921
+ # Example: Archive the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
3922
+ >>> fs.archive_feature_process("2a014f2d-6b71-11f0-aeda-f020ffe7fe09")
3923
+ Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
3924
+ Feature 'Jan' is archived from metadata.
3925
+ Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
3926
+ Feature 'Feb' is archived from metadata.
3927
+ Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
3928
+ Feature 'Mar' is archived from metadata.
3929
+ Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
3930
+ Feature 'Apr' is archived from metadata.
3931
+ FeatureProcess with process id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is archived.
3932
+ True
2317
3933
  """
2318
- return self.__version
3934
+ argument_validation_params = []
3935
+ argument_validation_params.append(["process_id", process_id, True, str, True])
3936
+
3937
+ # Validate argument types
3938
+ _Validators._validate_function_arguments(argument_validation_params)
3939
+
3940
+ features = self.__validate_feature_process(process_id)
3941
+ if features is False:
3942
+ return False
3943
+
3944
+ feature_details = FeatureCatalog._get_feature_details(
3945
+ self.__repo, self.__data_domain, features)
3946
+
3947
+ # Get the shared features.
3948
+ shared_features = FeatureCatalog._get_shared_features(self.__repo, self.__data_domain)
3949
+
3950
+ # Remove the features from the feature metadata table.
3951
+ return self.__remove_feature_process(
3952
+ process_id, features, feature_details, shared_features)
3953
+
3954
+ def delete_feature_process(self, process_id):
3955
+ """
3956
+ DESCRIPTION:
3957
+ Deletes the archived feature process from feature store with the given process_id.
3958
+ Notes:
3959
+ * One feature can be ingested by multiple processes. If feature associated with
3960
+ process "process_id" is also ingested by other processes, then "delete_feature_process()"
3961
+ function only deletes the feature values associated with the process "process_id". Else
3962
+ it deletes the feature from the feature catalog. Look at 'FeatureCatalog.delete_features()'
3963
+ for more details.
3964
+
3965
+ PARAMETERS:
3966
+ process_id:
3967
+ Required Argument.
3968
+ Specifies the ID of the FeatureProcess to delete from repository.
3969
+ Types: str
3970
+
3971
+ RETURNS:
3972
+ bool
3973
+
3974
+ RAISES:
3975
+ TeradataMLException, TypeError, ValueError
3976
+
3977
+ EXAMPLES:
3978
+ >>> load_example_data('dataframe', ['sales'])
3979
+ # Create a teradataml DataFrame.
3980
+ >>> from teradataml import DataFrame, FeatureProcess, FeatureStore
3981
+ >>> df = DataFrame("sales")
3982
+
3983
+ # Create FeatureStore for repo 'repo'.
3984
+ >>> fs = FeatureStore("repo", data_domain='sales')
3985
+ Repo repo does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3986
+ # Setup FeatureStore for this repository.
3987
+ >>> fs.setup()
3988
+ True
3989
+
3990
+ # Run FeatureProcess to ingest features.
3991
+ >>> from teradataml import FeatureProcess
3992
+ >>> fp = FeatureProcess(repo='repo',
3993
+ ... data_domain='sales',
3994
+ ... object=df,
3995
+ ... entity='accounts',
3996
+ ... features=['Jan', 'Feb', 'Mar', 'Apr'])
3997
+ >>> fp.run()
3998
+ Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' started.
3999
+ Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' completed.
4000
+
4001
+ # List the available FeatureProcesses.
4002
+ >>> fs.list_feature_processes()
4003
+ description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
4004
+ process_id
4005
+ 2a014f2d-6b71-11f0-aeda-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-07-28 05:10:34.760000+00: 9999-12-31 23:59:59.999999+00:
4006
+
4007
+ # Example: Archive the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
4008
+ >>> fs.archive_feature_process("2a014f2d-6b71-11f0-aeda-f020ffe7fe09")
4009
+ Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
4010
+ Feature 'Jan' is archived from metadata.
4011
+ Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
4012
+ Feature 'Feb' is archived from metadata.
4013
+ Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
4014
+ Feature 'Mar' is archived from metadata.
4015
+ Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
4016
+ Feature 'Apr' is archived from metadata.
4017
+ FeatureProcess with process id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is archived.
4018
+ True
4019
+
4020
+ # Example: Delete the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
4021
+ >>> fs.delete_feature_process('2a014f2d-6b71-11f0-aeda-f020ffe7fe09')
4022
+ Feature 'Feb' deleted successfully from table 'FS_T_e84ff803_3d5c_4793_cd72_251c780fffe4'.
4023
+ Feature 'Jan' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
4024
+ Feature 'Mar' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
4025
+ Feature 'Apr' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
4026
+ FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is deleted.
4027
+ True
4028
+
4029
+ # List the available FeatureProcesses after delete.
4030
+ >>> fs.list_feature_processes()
4031
+ Empty DataFrame
4032
+ Columns: [description, data_domain, process_type, data_source, entity_id, feature_names, feature_ids, valid_start, valid_end]
4033
+ Index: []
4034
+ """
4035
+ argument_validation_params = []
4036
+ argument_validation_params.append(["process_id", process_id, True, str, True])
4037
+
4038
+ # Validate argument types
4039
+ _Validators._validate_function_arguments(argument_validation_params)
4040
+
4041
+ # Before archive check if the specified process id is existed or not.
4042
+ features = self.__validate_feature_process(process_id, type_='delete')
4043
+ if features is False:
4044
+ return False
4045
+
4046
+ feature_details = FeatureCatalog._get_feature_details(
4047
+ self.__repo, self.__data_domain, features)
4048
+
4049
+ # Get the shared features.
4050
+ shared_features = FeatureCatalog._get_shared_features(self.__repo, self.__data_domain)
4051
+
4052
+ return self.__remove_feature_process(
4053
+ process_id, features, feature_details, shared_features, type_='delete')
4054
+
4055
+ @db_transaction
4056
+ def __remove_feature_process(self,
4057
+ process_id,
4058
+ process_features,
4059
+ feature_details,
4060
+ shared_features,
4061
+ type_='archive'):
4062
+ """
4063
+ DESCRIPTION:
4064
+ Internal function to remove the FeatureProcess from repository.
4065
+ It also removes the associated features from the feature table.
4066
+
4067
+ PARAMETERS:
4068
+ process_id:
4069
+ Required Argument.
4070
+ Specifies the ID of the FeatureProcess to remove from repository.
4071
+ Types: str
4072
+
4073
+ feature_details:
4074
+ Required Argument.
4075
+ Specifies the list of features to remove from repository.
4076
+ Types: list of namedtuple
4077
+
4078
+ type_:
4079
+ Optional Argument.
4080
+ Specifies the type of removal. Allowed values are 'archive' and 'delete'.
4081
+ Default value is 'archive'.
4082
+ Types: str
4083
+
4084
+ RETURNS:
4085
+ bool
4086
+
4087
+ RAISES:
4088
+ None
4089
+
4090
+ EXAMPLES:
4091
+ >>> self.__remove_feature_process("5747082b-4acb-11f0-a2d7-f020ffe7fe09",
4092
+ ... process_features=[namedtuple('feature_', ['name', 'id', 'table_name'])('sales_data_Feb', 1, 'FS_T_12345')],
4093
+ ... type_='archive')
4094
+ """
4095
+ temporal_clause = 'CURRENT VALIDTIME'
4096
+ delete_condition = (Col("process_id") == process_id)
4097
+ if type_ == 'delete':
4098
+ temporal_clause = None
4099
+
4100
+ fc = FeatureCatalog(self.__repo, self.__data_domain)
4101
+ res1 = fc._remove_features(process_features, feature_details, type_=='archive', shared_features, process_id)
4102
+
4103
+ # Remove it from feature process table.
4104
+ res = _delete_data(table_name=self.__table_names["feature_process"],
4105
+ schema_name=self.__repo,
4106
+ delete_conditions=delete_condition,
4107
+ temporal_clause=temporal_clause
4108
+ )
4109
+
4110
+ if res >= 1:
4111
+ print("FeatureProcess with process id '{}' is {}d.".format(process_id, type_))
4112
+ return res1 & True
4113
+
4114
+ print("FeatureProcess with process id '{}' does not exist to {}.".format(process_id, type_))
4115
+ return res1 & False
4116
+
4117
+ def __validate_feature_process(self, process_id, type_='archive'):
4118
+ """
4119
+ DESCRIPTION:
4120
+ Internal function to validate if the feature process is existed or not.
4121
+ Also, the function checks if the process is archived or not.
4122
+
4123
+ PARAMETERS:
4124
+ process_id:
4125
+ Required Argument.
4126
+ Specifies the ID of the FeatureProcess to validate.
4127
+ Types: str
4128
+
4129
+ type_:
4130
+ Optional Argument.
4131
+ Specifies the type of validation. Allowed values are 'archive' and 'delete'.
4132
+ Default value is 'archive'.
4133
+ Types: str
4134
+
4135
+ RETURNS:
4136
+ list or bool.
4137
+ False if process does not exist or archived.
4138
+ list if all validations are passed.
4139
+
4140
+ RAISES:
4141
+ TeradatamlException
4142
+
4143
+ EXAMPLES:
4144
+ >>> # Validate the feature process with process_id '5747082b-4acb-11f0-a2d7-f020ffe7fe09'.
4145
+ >>> fs.__validate_feature_process(process_id='5747082b-4acb-11f0-a2d7-f020ffe7fe09')
4146
+ (['sales_data_Feb', 'sales_data_Jan'], ['sales_data_Mar', 'sales_data_Apr'])
4147
+ """
4148
+ # Extract process type, data source, entity_id, feature_names from given process id.
4149
+ sql = EFS_ARCHIVED_RECORDS.format("feature_names",
4150
+ '"{}"."{}"'.format(self.__repo,
4151
+ self.__table_names["feature_process"]),
4152
+ "PROCESS_ID = '{}' AND DATA_DOMAIN = '{}'".
4153
+ format(process_id, self.__data_domain))
4154
+
4155
+ feature_names = set()
4156
+ all_archived = True
4157
+ any_one_not_archived = False
4158
+ for rec in execute_sql(sql):
4159
+ is_archived = rec[1] == 1
4160
+ all_archived = all_archived and is_archived
4161
+ any_one_not_archived = any_one_not_archived or (not is_archived)
4162
+ feature_names.update([f.strip() for f in rec[0].split(",")])
4163
+
4164
+ # Not raising error to align with the behavior of other methods.
4165
+ if not feature_names:
4166
+ print("FeatureProcess with process id '{}' does not exist.".format(process_id))
4167
+ return False
4168
+
4169
+ # Check if feature is already archived or not.
4170
+ if type_ == 'archive' and all_archived:
4171
+ # All records valid end date should be less than current timestamp in such case.
4172
+ print("FeatureProcess with process id '{}' is already archived.".format(process_id))
4173
+ return False
4174
+
4175
+ # For delete, check if the process is archived or not first.
4176
+ if type_ == 'delete' and any_one_not_archived:
4177
+ print("FeatureProcess with process id '{}' is not archived. "
4178
+ "First archive the process and then delete it.".format(process_id))
4179
+ return False
4180
+
4181
+ # Check if feature is associated with any dataset or not.
4182
+ dataset_features_df = self.__get_dataset_features_df()
4183
+ # Validate the feature names.
4184
+ _Validators._validate_features_not_in_efs_dataset(
4185
+ df=dataset_features_df[(dataset_features_df['data_domain'] == self.__data_domain)],
4186
+ feature_names=list(feature_names),
4187
+ action='archived')
4188
+
4189
+ return feature_names
4190
+
4191
+ def remove_data_domain(self):
4192
+ """
4193
+ DESCRIPTION:
4194
+ Removes the data domain from the FeatureStore and all associated objects.
4195
+
4196
+ Notes:
4197
+ * This operation permanently deletes all objects, tables, and views tied to the data domain.
4198
+ * There is no archival or built‑in recovery, all deletions are irreversible.
4199
+
4200
+ PARAMETERS:
4201
+ None
4202
+
4203
+ RETURNS:
4204
+ bool
4205
+
4206
+ RAISES:
4207
+ TeradataMLException
4208
+
4209
+ EXAMPLES:
4210
+ >>> from teradataml import FeatureStore
4211
+ # Create a new FeatureStore or use an existing one.
4212
+ >>> fs = FeatureStore("repo", data_domain="sales")
4213
+ FeatureStore is ready to use.
4214
+
4215
+ # Remove the data domain 'sales' and all associated objects.
4216
+ >>> fs.remove_data_domain()
4217
+ The function will remove the data domain 'sales' and all associated objects. Are you sure you want to proceed? (Y/N): Y
4218
+ Data domain 'sales' is removed from the FeatureStore.
4219
+ True
4220
+ """
4221
+ confirmation = input("The function will remove the data domain '{}' and" \
4222
+ " all associated objects. Are you sure you want to proceed? (Y/N): ".format(self.__data_domain))
4223
+
4224
+ if confirmation not in ["Y", "y"]:
4225
+ return False
4226
+
4227
+ # Get the views to drop related to the data domain.
4228
+ dataset_features_df = self.__get_dataset_features_df()
4229
+ filtered_dataset_features_df = dataset_features_df[dataset_features_df['data_domain'] == self.__data_domain].itertuples()
4230
+ views_to_drop = list({rec.feature_view for rec in filtered_dataset_features_df})
4231
+
4232
+ # Get the tables to drop related to the data domain.
4233
+ features_metadata_df = self.__get_features_metadata_df()
4234
+ filtered_features_metadata_df = features_metadata_df[features_metadata_df['data_domain'] == self.__data_domain].itertuples()
4235
+ tables_to_drop = list({rec.table_name for rec in filtered_features_metadata_df})
4236
+
4237
+ res = db_transaction(self.__remove_data_domain)()
4238
+
4239
+ # Drop the views related to the data domain.
4240
+ for view in views_to_drop:
4241
+ try:
4242
+ execute_sql(f"DROP VIEW {_get_quoted_object_name(schema_name=self.__repo, object_name=view)}")
4243
+ except Exception as e:
4244
+ print(f"Error dropping view {view}: {e}")
4245
+ # Drop the tables related to the data domain.
4246
+ for table in tables_to_drop:
4247
+ try:
4248
+ execute_sql(f"DROP TABLE {_get_quoted_object_name(schema_name=self.__repo, object_name=table)}")
4249
+ except Exception as e:
4250
+ print(f"Error dropping table {table}: {e}")
4251
+
4252
+ return True
4253
+
4254
+ def __remove_data_domain(self):
4255
+ """
4256
+ DESCRIPTION:
4257
+ Internal method to remove the data domain from the FeatureStore and all associated objects.
4258
+
4259
+ PARAMETERS:
4260
+ None
4261
+
4262
+ RETURNS:
4263
+ bool
4264
+
4265
+ RAISES:
4266
+ TeradataMLException
4267
+
4268
+ EXAMPLES:
4269
+ >>> fs.__remove_data_domain()
4270
+ """
4271
+ # TO remove data domain from the FeatureStore, we need to:
4272
+ # 1. Remove data domain entries from the dataset catalog and dataset features.
4273
+ # 2. Remove data domain entries from the feature metadata.
4274
+ # 3. Remove data domain entries from the feature processes.
4275
+ # 4. Remove data_domain entries from feature groups, group features, and their staging tables.
4276
+ # 5. Remove data_domain entries from features and their staging tables.
4277
+ # 6. Remove data_domain entries from entities, entity xref, and their staging tables.
4278
+ # 7. Remove data_domain entries from data sources and their staging tables.
4279
+ # 8. Remove data_domain entries from data_domain table.
4280
+
4281
+ # 1. Remove data domain entries from the dataset catalog and dataset features.
4282
+ _delete_data(
4283
+ table_name=self.__table_names['dataset_catalog'],
4284
+ schema_name=self.__repo,
4285
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4286
+ )
4287
+
4288
+ _delete_data(
4289
+ table_name=self.__table_names['dataset_features'],
4290
+ schema_name=self.__repo,
4291
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4292
+ )
4293
+
4294
+ # 2. Remove data domain entries from the feature metadata.
4295
+ _delete_data(
4296
+ table_name=self.__table_names['feature_metadata'],
4297
+ schema_name=self.__repo,
4298
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4299
+ )
4300
+
4301
+ # 3. Remove data_domain entries from the feature processes.
4302
+ _delete_data(
4303
+ table_name=self.__table_names['feature_process'],
4304
+ schema_name=self.__repo,
4305
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4306
+ )
4307
+
4308
+ # 4. Remove data_domain entries from feature groups, group features, and their staging tables.
4309
+ _delete_data(
4310
+ table_name=self.__table_names['group_features'],
4311
+ schema_name=self.__repo,
4312
+ delete_conditions=((Col("group_data_domain") == self.__data_domain))
4313
+ )
4314
+ _delete_data(
4315
+ table_name=self.__table_names['feature_group'],
4316
+ schema_name=self.__repo,
4317
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4318
+ )
4319
+
4320
+ _delete_data(
4321
+ table_name=self.__table_names["group_features_staging"],
4322
+ schema_name=self.__repo,
4323
+ delete_conditions=(Col("group_data_domain") == self.__data_domain))
4324
+
4325
+ _delete_data(
4326
+ table_name=self.__table_names["feature_group_staging"],
4327
+ schema_name=self.__repo,
4328
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4329
+ )
4330
+
4331
+ # 5. Remove data_domain entries from features and their staging tables.
4332
+ _delete_data(
4333
+ table_name=self.__table_names['feature'],
4334
+ schema_name=self.__repo,
4335
+ delete_conditions=(Col("data_domain") == self.data_domain)
4336
+ )
4337
+
4338
+ _delete_data(
4339
+ table_name=self.__table_names['feature_staging'],
4340
+ schema_name=self.__repo,
4341
+ delete_conditions=(Col("data_domain") == self.data_domain)
4342
+ )
4343
+
4344
+ # 6. Remove data_domain entries from entities, entity xref, and their staging tables.
4345
+ _delete_data(
4346
+ table_name=self.__table_names['entity_xref'],
4347
+ schema_name=self.__repo,
4348
+ delete_conditions=(Col("data_domain") == self.data_domain)
4349
+ )
4350
+ _delete_data(
4351
+ table_name=self.__table_names['entity'],
4352
+ schema_name=self.__repo,
4353
+ delete_conditions=(Col("data_domain") == self.data_domain)
4354
+ )
4355
+
4356
+ _delete_data(
4357
+ table_name=self.__table_names['entity_staging'],
4358
+ schema_name=self.__repo,
4359
+ delete_conditions=(Col("data_domain") == self.data_domain)
4360
+ )
4361
+
4362
+ _delete_data(
4363
+ table_name=self.__table_names['entity_staging_xref'],
4364
+ schema_name=self.__repo,
4365
+ delete_conditions=(Col("data_domain") == self.data_domain)
4366
+ )
4367
+
4368
+ # 7. Remove data_domain entries from data sources and their staging tables.
4369
+ _delete_data(
4370
+ table_name=self.__table_names['data_source'],
4371
+ schema_name=self.__repo,
4372
+ delete_conditions=(Col("data_domain") == self.data_domain)
4373
+ )
4374
+
4375
+ _delete_data(
4376
+ table_name=self.__table_names['data_source_staging'],
4377
+ schema_name=self.__repo,
4378
+ delete_conditions=(Col("data_domain") == self.data_domain)
4379
+ )
4380
+
4381
+ # 8. Remove data_domain entries from data_domain table.
4382
+ _delete_data(
4383
+ table_name=self.__table_names['data_domain'],
4384
+ schema_name=self.__repo,
4385
+ delete_conditions=(Col("name") == self.__data_domain)
4386
+ )
4387
+
4388
+ print(f"Data domain '{self.__data_domain}' is removed from the FeatureStore.")
4389
+ return True
4390
+
4391
+ def mind_map(self, feature_process=None):
4392
+ """
4393
+ DESCRIPTION:
4394
+ Returns a visual mind map of the FeatureStore, showing data sources,
4395
+ feature processes, feature catalog, and dataset catalog, with dependencies
4396
+ illustrated by curves.
4397
+ Note:
4398
+ Works only in Jupyter Notebook or similar environments that support HTML rendering.
4399
+
4400
+ PARAMETERS:
4401
+ feature_process:
4402
+ Optional Argument.
4403
+ Specifies the feature process to filter the mind map. When specified,
4404
+ only the feature process and its related data sources, features, and datasets
4405
+ is displayed.
4406
+ Notes:
4407
+ * mind_map() display only the features which are associated with the
4408
+ feature process for the datasets also. For example, if Dataset is associated
4409
+ with Feature1, Feature2 and Feature1 is ingested by FeatureProcess1 and
4410
+ Feature2 is ingested by FeatureProcess2, then mind_map() displays the
4411
+ Dataset with Feature1 only if "feature_process" is set to FeatureProcess1.
4412
+ * If "feature_process" is not specified, then mind_map() displays all the
4413
+ feature processes, data sources, features, and datasets in the FeatureStore.
4414
+ Types: str OR list of str
4415
+
4416
+ RETURNS:
4417
+ None (displays HTML visualization)
4418
+
4419
+ RAISES:
4420
+ TypeError
4421
+
4422
+ EXAMPLES:
4423
+ # Example 1: Display the mind map of the FeatureStore with all feature processes.
4424
+ >>> from teradataml import DataFrame, FeatureStore
4425
+ >>> load_example_data("dataframe", "sales")
4426
+ # Create DataFrame.
4427
+ >>> sales_df = DataFrame("sales")
4428
+ >>> admissions_df = DataFrame("admissions")
4429
+
4430
+ # Create a FeatureStore for the repo 'vfs_v1'.
4431
+ >>> fs = FeatureStore("vfs_v1", data_domain='Analytics')
4432
+ FeatureStore is ready to use.
4433
+
4434
+ # Create a feature process to ingest sales df.
4435
+ >>> fp1 = fs.get_feature_process(object=df,
4436
+ ... features=['Jan', 'Feb', 'Mar', 'Apr'],
4437
+ ... entity='accounts')
4438
+ >>> fp1.run()
4439
+ Process '7b9f76d6-562c-11f0-bb98-c934b24a960f' started.
4440
+ Process '7b9f76d6-562c-11f0-bb98-c934b24a960f' completed.
4441
+ True
4442
+
4443
+ # Create a feature process to ingest admissions df.
4444
+ >>> fp2 = fs.get_feature_process(object=admissions_df,
4445
+ ... features=[ 'masters', 'gpa', 'stats', 'programming', 'admitted'],
4446
+ ... entity='id')
4447
+ >>> fp2.run()
4448
+ Process 'a5de0230-6b8e-11f0-ae70-f020ffe7fe09' started.
4449
+ Process 'a5de0230-6b8e-11f0-ae70-f020ffe7fe09' completed.
4450
+
4451
+ # Example 1: Display the mind map of the FeatureStore.
4452
+ >>> fs.mind_map()
4453
+
4454
+ # Example 2: Display the mind map of the FeatureStore for the sales feature process.
4455
+ >>> fs.mind_map(feature_process=fp1.process_id)
4456
+
4457
+ # Example 3: Display the mind map of the FeatureStore for admissions features.
4458
+ >>> fs.mind_map(feature_process=fp2.process_id)
4459
+
4460
+ # Example 4: Display the mind map of the FeatureStore for both sales and admissions feature
4461
+ # processes.
4462
+ >>> fs.mind_map(feature_process=[fp1.process_id, fp2.process_id])
4463
+ """
4464
+ # Validate arguments
4465
+ argument_validation_params = []
4466
+ argument_validation_params.append(["feature_process", feature_process, True, (str, list), True])
4467
+
4468
+ # Validate argument types
4469
+ _Validators._validate_function_arguments(argument_validation_params)
4470
+
4471
+ # 1. Declare Python variables for the mind map
4472
+ data_sources_ = set()
4473
+ feature_processes_ = set()
4474
+ features_ = set()
4475
+ datasets_ = set()
4476
+ data_source_map = {}
4477
+ feature_process_map = {}
4478
+ dataset_feature_map = {}
4479
+
4480
+ sql = """
4481
+ select distinct process_id, oreplace(data_source, '"', '') as data_source, feature_names from "{}".{}
4482
+ where data_domain = '{}'
4483
+ """.format(self.__repo, EFS_DB_COMPONENTS['feature_process'], self.__data_domain)
4484
+
4485
+ # If user provides feature process, filter the SQL query.
4486
+ if feature_process:
4487
+ feature_process = UtilFuncs._as_list(feature_process)
4488
+ feature_process_str = ', '.join(f"'{fp}'" for fp in feature_process)
4489
+ sql += " and process_id in ({})".format(feature_process_str)
4490
+
4491
+ recs = execute_sql(sql)
4492
+ for rec in recs:
4493
+ process_id, data_source, feature_names = rec
4494
+ data_sources_.add(data_source)
4495
+ feature_processes_.add(process_id)
4496
+ feature_names = [f.strip() for f in feature_names.split(',')]
4497
+ features_.update(feature_names)
4498
+
4499
+ # Populate the maps.
4500
+ if data_source not in data_source_map:
4501
+ data_source_map[data_source] = []
4502
+ data_source_map[data_source].append(process_id)
4503
+
4504
+ if process_id not in feature_process_map:
4505
+ feature_process_map[process_id] = []
4506
+ feature_process_map[process_id].extend(feature_names)
4507
+
4508
+ # feature process map can have duplicates.
4509
+ feature_process_map = {k: list(set(v)) for k, v in feature_process_map.items()}
4510
+
4511
+ data_sources = [{"id": ds, "label": ds} for ds in data_sources_]
4512
+ feature_processes = [{"id": fp, "label": fp} for fp in feature_processes_]
4513
+ features = [{"id": f, "label": f} for f in features_]
4514
+
4515
+ # Create datasets and dataset_feature_map.
4516
+ ds_sql = """
4517
+ select feature_view, feature_name from
4518
+ "{}".{}
4519
+ where data_domain = '{}'
4520
+ """.format(self.__repo, EFS_DB_COMPONENTS['dataset_features'], self.__data_domain)
4521
+
4522
+ # If user provides a specific feature process, then show only those features in datasets.
4523
+ if feature_process:
4524
+ fp_str = ', '.join(f"'{fp}'" for fp in feature_process)
4525
+ ds_sql += " and feature_version IN ({})".format(fp_str)
4526
+
4527
+ recs = execute_sql(ds_sql)
4528
+ for rec in recs:
4529
+ feature_view, feature_name = rec
4530
+ datasets_.add(feature_view)
4531
+ if feature_view not in dataset_feature_map:
4532
+ dataset_feature_map[feature_view] = []
4533
+ dataset_feature_map[feature_view].append(feature_name)
4534
+
4535
+ datasets = [{"id": ds, "label": ds} for ds in datasets_]
4536
+
4537
+ # 2. Add unique suffix to all ids in the variables
4538
+ from time import time as epoch_seconds
4539
+ suffix = f"_fs_{str(epoch_seconds()).replace('.', '_')}"
4540
+
4541
+ def add_suffix_to_list(lst):
4542
+ return [dict(obj, id=obj["id"] + suffix) for obj in lst]
4543
+
4544
+ def add_suffix_to_dict_keys_and_values(dct):
4545
+ return {k + suffix: [v + suffix for v in vs] for k, vs in dct.items()}
4546
+
4547
+ data_sources_js = add_suffix_to_list(data_sources)
4548
+ feature_processes_js = add_suffix_to_list([obj for obj in feature_processes if not obj.get("invisible")])
4549
+ # Keep invisible objects for completeness in features, but filter for display if needed
4550
+ features_js = add_suffix_to_list(features)
4551
+ datasets_js = add_suffix_to_list(datasets)
4552
+ data_source_map_js = add_suffix_to_dict_keys_and_values(data_source_map)
4553
+ feature_process_map_js = add_suffix_to_dict_keys_and_values(feature_process_map)
4554
+ dataset_feature_map_js = add_suffix_to_dict_keys_and_values(dataset_feature_map)
4555
+
4556
+ # 3. Prepare JS variable strings
4557
+ import json
4558
+ js_data_sources = json.dumps(data_sources_js)
4559
+ js_feature_processes = json.dumps(feature_processes_js)
4560
+ js_features = json.dumps(features_js)
4561
+ js_datasets = json.dumps(datasets_js)
4562
+ js_data_source_map = json.dumps(data_source_map_js)
4563
+ js_feature_process_map = json.dumps(feature_process_map_js)
4564
+ js_dataset_feature_map = json.dumps(dataset_feature_map_js)
4565
+
4566
+ # 4. Get current GMT timestamp for display
4567
+ from datetime import datetime, timezone
4568
+ gmt_now = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S GMT')
4569
+
4570
+ # 5. Inject the JS variables, timestamp, and feature store name into the template
4571
+ html_ = _TD_FS_MindMap_Template\
4572
+ .replace("__DATA_SOURCES__", js_data_sources) \
4573
+ .replace("__FEATURE_PROCESSES__", js_feature_processes) \
4574
+ .replace("__FEATURES__", js_features) \
4575
+ .replace("__DATASETS__", js_datasets) \
4576
+ .replace("__DATA_SOURCE_MAP__", js_data_source_map) \
4577
+ .replace("__FEATURE_PROCESS_MAP__", js_feature_process_map) \
4578
+ .replace("__DATASET_FEATURE_MAP__", js_dataset_feature_map) \
4579
+ .replace("__MINDMAP_TIMESTAMP__", gmt_now) \
4580
+ .replace("__REPO__", self.__repo)\
4581
+ .replace("__DATA_DOMAIN__", self.__data_domain)
4582
+
4583
+ # 7. Add the unique suffix to all element IDs in the HTML/JS
4584
+ html_ = html_.replace("_fs_i", suffix)
4585
+
4586
+ from IPython.display import display, HTML
4587
+ display(HTML(html_))
4588
+