teradataml 20.0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1208) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +2762 -0
  4. teradataml/__init__.py +78 -0
  5. teradataml/_version.py +11 -0
  6. teradataml/analytics/Transformations.py +2996 -0
  7. teradataml/analytics/__init__.py +82 -0
  8. teradataml/analytics/analytic_function_executor.py +2416 -0
  9. teradataml/analytics/analytic_query_generator.py +1050 -0
  10. teradataml/analytics/byom/H2OPredict.py +514 -0
  11. teradataml/analytics/byom/PMMLPredict.py +437 -0
  12. teradataml/analytics/byom/__init__.py +16 -0
  13. teradataml/analytics/json_parser/__init__.py +133 -0
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
  15. teradataml/analytics/json_parser/json_store.py +191 -0
  16. teradataml/analytics/json_parser/metadata.py +1666 -0
  17. teradataml/analytics/json_parser/utils.py +805 -0
  18. teradataml/analytics/meta_class.py +236 -0
  19. teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
  21. teradataml/analytics/sqle/__init__.py +128 -0
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
  24. teradataml/analytics/table_operator/__init__.py +11 -0
  25. teradataml/analytics/uaf/__init__.py +82 -0
  26. teradataml/analytics/utils.py +828 -0
  27. teradataml/analytics/valib.py +1617 -0
  28. teradataml/automl/__init__.py +5835 -0
  29. teradataml/automl/autodataprep/__init__.py +493 -0
  30. teradataml/automl/custom_json_utils.py +1625 -0
  31. teradataml/automl/data_preparation.py +1384 -0
  32. teradataml/automl/data_transformation.py +1254 -0
  33. teradataml/automl/feature_engineering.py +2273 -0
  34. teradataml/automl/feature_exploration.py +1873 -0
  35. teradataml/automl/model_evaluation.py +488 -0
  36. teradataml/automl/model_training.py +1407 -0
  37. teradataml/catalog/__init__.py +2 -0
  38. teradataml/catalog/byom.py +1759 -0
  39. teradataml/catalog/function_argument_mapper.py +859 -0
  40. teradataml/catalog/model_cataloging_utils.py +491 -0
  41. teradataml/clients/__init__.py +0 -0
  42. teradataml/clients/auth_client.py +137 -0
  43. teradataml/clients/keycloak_client.py +165 -0
  44. teradataml/clients/pkce_client.py +481 -0
  45. teradataml/common/__init__.py +1 -0
  46. teradataml/common/aed_utils.py +2078 -0
  47. teradataml/common/bulk_exposed_utils.py +113 -0
  48. teradataml/common/constants.py +1669 -0
  49. teradataml/common/deprecations.py +166 -0
  50. teradataml/common/exceptions.py +147 -0
  51. teradataml/common/formula.py +743 -0
  52. teradataml/common/garbagecollector.py +666 -0
  53. teradataml/common/logger.py +1261 -0
  54. teradataml/common/messagecodes.py +518 -0
  55. teradataml/common/messages.py +262 -0
  56. teradataml/common/pylogger.py +67 -0
  57. teradataml/common/sqlbundle.py +764 -0
  58. teradataml/common/td_coltype_code_to_tdtype.py +48 -0
  59. teradataml/common/utils.py +3166 -0
  60. teradataml/common/warnings.py +36 -0
  61. teradataml/common/wrapper_utils.py +625 -0
  62. teradataml/config/__init__.py +0 -0
  63. teradataml/config/dummy_file1.cfg +5 -0
  64. teradataml/config/dummy_file2.cfg +3 -0
  65. teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
  66. teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
  67. teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
  68. teradataml/context/__init__.py +0 -0
  69. teradataml/context/aed_context.py +223 -0
  70. teradataml/context/context.py +1462 -0
  71. teradataml/data/A_loan.csv +19 -0
  72. teradataml/data/BINARY_REALS_LEFT.csv +11 -0
  73. teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
  74. teradataml/data/B_loan.csv +49 -0
  75. teradataml/data/BuoyData2.csv +17 -0
  76. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
  77. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
  78. teradataml/data/Convolve2RealsLeft.csv +5 -0
  79. teradataml/data/Convolve2RealsRight.csv +5 -0
  80. teradataml/data/Convolve2ValidLeft.csv +11 -0
  81. teradataml/data/Convolve2ValidRight.csv +11 -0
  82. teradataml/data/DFFTConv_Real_8_8.csv +65 -0
  83. teradataml/data/Employee.csv +5 -0
  84. teradataml/data/Employee_Address.csv +4 -0
  85. teradataml/data/Employee_roles.csv +5 -0
  86. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  87. teradataml/data/Mall_customer_data.csv +201 -0
  88. teradataml/data/Orders1_12mf.csv +25 -0
  89. teradataml/data/Pi_loan.csv +7 -0
  90. teradataml/data/SMOOTHED_DATA.csv +7 -0
  91. teradataml/data/TestDFFT8.csv +9 -0
  92. teradataml/data/TestRiver.csv +109 -0
  93. teradataml/data/Traindata.csv +28 -0
  94. teradataml/data/__init__.py +0 -0
  95. teradataml/data/acf.csv +17 -0
  96. teradataml/data/adaboost_example.json +34 -0
  97. teradataml/data/adaboostpredict_example.json +24 -0
  98. teradataml/data/additional_table.csv +11 -0
  99. teradataml/data/admissions_test.csv +21 -0
  100. teradataml/data/admissions_train.csv +41 -0
  101. teradataml/data/admissions_train_nulls.csv +41 -0
  102. teradataml/data/advertising.csv +201 -0
  103. teradataml/data/ageandheight.csv +13 -0
  104. teradataml/data/ageandpressure.csv +31 -0
  105. teradataml/data/amazon_reviews_25.csv +26 -0
  106. teradataml/data/antiselect_example.json +36 -0
  107. teradataml/data/antiselect_input.csv +8 -0
  108. teradataml/data/antiselect_input_mixed_case.csv +8 -0
  109. teradataml/data/applicant_external.csv +7 -0
  110. teradataml/data/applicant_reference.csv +7 -0
  111. teradataml/data/apriori_example.json +22 -0
  112. teradataml/data/arima_example.json +9 -0
  113. teradataml/data/assortedtext_input.csv +8 -0
  114. teradataml/data/attribution_example.json +34 -0
  115. teradataml/data/attribution_sample_table.csv +27 -0
  116. teradataml/data/attribution_sample_table1.csv +6 -0
  117. teradataml/data/attribution_sample_table2.csv +11 -0
  118. teradataml/data/bank_churn.csv +10001 -0
  119. teradataml/data/bank_marketing.csv +11163 -0
  120. teradataml/data/bank_web_clicks1.csv +43 -0
  121. teradataml/data/bank_web_clicks2.csv +91 -0
  122. teradataml/data/bank_web_url.csv +85 -0
  123. teradataml/data/barrier.csv +2 -0
  124. teradataml/data/barrier_new.csv +3 -0
  125. teradataml/data/betweenness_example.json +14 -0
  126. teradataml/data/bike_sharing.csv +732 -0
  127. teradataml/data/bin_breaks.csv +8 -0
  128. teradataml/data/bin_fit_ip.csv +4 -0
  129. teradataml/data/binary_complex_left.csv +11 -0
  130. teradataml/data/binary_complex_right.csv +11 -0
  131. teradataml/data/binary_matrix_complex_left.csv +21 -0
  132. teradataml/data/binary_matrix_complex_right.csv +21 -0
  133. teradataml/data/binary_matrix_real_left.csv +21 -0
  134. teradataml/data/binary_matrix_real_right.csv +21 -0
  135. teradataml/data/blood2ageandweight.csv +26 -0
  136. teradataml/data/bmi.csv +501 -0
  137. teradataml/data/boston.csv +507 -0
  138. teradataml/data/boston2cols.csv +721 -0
  139. teradataml/data/breast_cancer.csv +570 -0
  140. teradataml/data/buoydata_mix.csv +11 -0
  141. teradataml/data/burst_data.csv +5 -0
  142. teradataml/data/burst_example.json +21 -0
  143. teradataml/data/byom_example.json +34 -0
  144. teradataml/data/bytes_table.csv +4 -0
  145. teradataml/data/cal_housing_ex_raw.csv +70 -0
  146. teradataml/data/callers.csv +7 -0
  147. teradataml/data/calls.csv +10 -0
  148. teradataml/data/cars_hist.csv +33 -0
  149. teradataml/data/cat_table.csv +25 -0
  150. teradataml/data/ccm_example.json +32 -0
  151. teradataml/data/ccm_input.csv +91 -0
  152. teradataml/data/ccm_input2.csv +13 -0
  153. teradataml/data/ccmexample.csv +101 -0
  154. teradataml/data/ccmprepare_example.json +9 -0
  155. teradataml/data/ccmprepare_input.csv +91 -0
  156. teradataml/data/cfilter_example.json +12 -0
  157. teradataml/data/changepointdetection_example.json +18 -0
  158. teradataml/data/changepointdetectionrt_example.json +8 -0
  159. teradataml/data/chi_sq.csv +3 -0
  160. teradataml/data/churn_data.csv +14 -0
  161. teradataml/data/churn_emission.csv +35 -0
  162. teradataml/data/churn_initial.csv +3 -0
  163. teradataml/data/churn_state_transition.csv +5 -0
  164. teradataml/data/citedges_2.csv +745 -0
  165. teradataml/data/citvertices_2.csv +1210 -0
  166. teradataml/data/clicks2.csv +16 -0
  167. teradataml/data/clickstream.csv +13 -0
  168. teradataml/data/clickstream1.csv +11 -0
  169. teradataml/data/closeness_example.json +16 -0
  170. teradataml/data/complaints.csv +21 -0
  171. teradataml/data/complaints_mini.csv +3 -0
  172. teradataml/data/complaints_test_tokenized.csv +353 -0
  173. teradataml/data/complaints_testtoken.csv +224 -0
  174. teradataml/data/complaints_tokens_model.csv +348 -0
  175. teradataml/data/complaints_tokens_test.csv +353 -0
  176. teradataml/data/complaints_traintoken.csv +472 -0
  177. teradataml/data/computers_category.csv +1001 -0
  178. teradataml/data/computers_test1.csv +1252 -0
  179. teradataml/data/computers_train1.csv +5009 -0
  180. teradataml/data/computers_train1_clustered.csv +5009 -0
  181. teradataml/data/confusionmatrix_example.json +9 -0
  182. teradataml/data/conversion_event_table.csv +3 -0
  183. teradataml/data/corr_input.csv +17 -0
  184. teradataml/data/correlation_example.json +11 -0
  185. teradataml/data/covid_confirm_sd.csv +83 -0
  186. teradataml/data/coxhazardratio_example.json +39 -0
  187. teradataml/data/coxph_example.json +15 -0
  188. teradataml/data/coxsurvival_example.json +28 -0
  189. teradataml/data/cpt.csv +41 -0
  190. teradataml/data/credit_ex_merged.csv +45 -0
  191. teradataml/data/creditcard_data.csv +1001 -0
  192. teradataml/data/customer_loyalty.csv +301 -0
  193. teradataml/data/customer_loyalty_newseq.csv +31 -0
  194. teradataml/data/customer_segmentation_test.csv +2628 -0
  195. teradataml/data/customer_segmentation_train.csv +8069 -0
  196. teradataml/data/dataframe_example.json +173 -0
  197. teradataml/data/decisionforest_example.json +37 -0
  198. teradataml/data/decisionforestpredict_example.json +38 -0
  199. teradataml/data/decisiontree_example.json +21 -0
  200. teradataml/data/decisiontreepredict_example.json +45 -0
  201. teradataml/data/dfft2_size4_real.csv +17 -0
  202. teradataml/data/dfft2_test_matrix16.csv +17 -0
  203. teradataml/data/dfft2conv_real_4_4.csv +65 -0
  204. teradataml/data/diabetes.csv +443 -0
  205. teradataml/data/diabetes_test.csv +89 -0
  206. teradataml/data/dict_table.csv +5 -0
  207. teradataml/data/docperterm_table.csv +4 -0
  208. teradataml/data/docs/__init__.py +1 -0
  209. teradataml/data/docs/byom/__init__.py +0 -0
  210. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
  211. teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
  212. teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
  213. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  214. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
  215. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  216. teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
  217. teradataml/data/docs/byom/docs/__init__.py +0 -0
  218. teradataml/data/docs/sqle/__init__.py +0 -0
  219. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
  220. teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
  221. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
  222. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
  223. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
  224. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
  225. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
  226. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
  227. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
  228. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
  229. teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
  230. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
  231. teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
  232. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
  233. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
  234. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
  235. teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
  236. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
  237. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
  238. teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
  239. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
  240. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
  241. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
  242. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
  243. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
  244. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
  245. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
  246. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
  247. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
  248. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
  249. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
  250. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
  251. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
  252. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
  253. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
  254. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
  255. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
  256. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
  257. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
  258. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
  259. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
  260. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
  261. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
  262. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
  263. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
  264. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
  265. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
  266. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
  267. teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
  268. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
  269. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
  270. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  271. teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
  272. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
  273. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
  274. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  275. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
  276. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
  277. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
  278. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
  279. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
  280. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
  281. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
  282. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
  283. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
  284. teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
  285. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
  286. teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
  287. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
  288. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
  289. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
  290. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
  291. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
  292. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
  293. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
  294. teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
  295. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
  296. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
  297. teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
  298. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
  299. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  300. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
  301. teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
  302. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  303. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
  304. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
  305. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
  306. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
  307. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
  308. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
  309. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
  310. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
  311. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
  312. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
  313. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
  314. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
  315. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
  316. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
  317. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
  318. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  319. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
  320. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
  321. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
  322. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
  323. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
  324. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
  325. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
  326. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
  327. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
  328. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
  329. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
  330. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  331. teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
  332. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
  333. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
  334. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
  335. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
  336. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
  337. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
  338. teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
  339. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
  340. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
  341. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
  342. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
  343. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
  344. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
  345. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
  346. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  347. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  348. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
  349. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
  350. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  351. teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
  352. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
  353. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
  354. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
  355. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
  356. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  357. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
  358. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
  359. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
  360. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
  361. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
  362. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
  363. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
  364. teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
  365. teradataml/data/docs/tableoperator/__init__.py +0 -0
  366. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
  367. teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
  368. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
  369. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
  370. teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
  371. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
  372. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
  373. teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
  374. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  375. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
  376. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
  377. teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
  378. teradataml/data/docs/uaf/__init__.py +0 -0
  379. teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
  380. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
  381. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
  382. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
  383. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  384. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  385. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
  386. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
  387. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
  388. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
  389. teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
  390. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
  391. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  392. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
  393. teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
  394. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
  395. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
  396. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
  397. teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
  398. teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
  399. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  400. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
  401. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
  402. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
  403. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
  404. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  405. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
  406. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
  407. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
  408. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
  409. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
  410. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
  411. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
  412. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  413. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  414. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  415. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
  416. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
  417. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
  418. teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
  419. teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
  420. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  421. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
  422. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
  423. teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
  424. teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
  425. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
  426. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
  427. teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
  428. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  429. teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
  430. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
  431. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
  432. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
  433. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
  434. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
  435. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
  436. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
  437. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
  438. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
  439. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
  440. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  441. teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
  442. teradataml/data/dtw_example.json +18 -0
  443. teradataml/data/dtw_t1.csv +11 -0
  444. teradataml/data/dtw_t2.csv +4 -0
  445. teradataml/data/dwt2d_dataTable.csv +65 -0
  446. teradataml/data/dwt2d_example.json +16 -0
  447. teradataml/data/dwt_dataTable.csv +8 -0
  448. teradataml/data/dwt_example.json +15 -0
  449. teradataml/data/dwt_filterTable.csv +3 -0
  450. teradataml/data/dwt_filter_dim.csv +5 -0
  451. teradataml/data/emission.csv +9 -0
  452. teradataml/data/emp_table_by_dept.csv +19 -0
  453. teradataml/data/employee_info.csv +4 -0
  454. teradataml/data/employee_table.csv +6 -0
  455. teradataml/data/excluding_event_table.csv +2 -0
  456. teradataml/data/finance_data.csv +6 -0
  457. teradataml/data/finance_data2.csv +61 -0
  458. teradataml/data/finance_data3.csv +93 -0
  459. teradataml/data/finance_data4.csv +13 -0
  460. teradataml/data/fish.csv +160 -0
  461. teradataml/data/fm_blood2ageandweight.csv +26 -0
  462. teradataml/data/fmeasure_example.json +12 -0
  463. teradataml/data/followers_leaders.csv +10 -0
  464. teradataml/data/fpgrowth_example.json +12 -0
  465. teradataml/data/frequentpaths_example.json +29 -0
  466. teradataml/data/friends.csv +9 -0
  467. teradataml/data/fs_input.csv +33 -0
  468. teradataml/data/fs_input1.csv +33 -0
  469. teradataml/data/genData.csv +513 -0
  470. teradataml/data/geodataframe_example.json +40 -0
  471. teradataml/data/glass_types.csv +215 -0
  472. teradataml/data/glm_admissions_model.csv +12 -0
  473. teradataml/data/glm_example.json +56 -0
  474. teradataml/data/glml1l2_example.json +28 -0
  475. teradataml/data/glml1l2predict_example.json +54 -0
  476. teradataml/data/glmpredict_example.json +54 -0
  477. teradataml/data/gq_t1.csv +21 -0
  478. teradataml/data/grocery_transaction.csv +19 -0
  479. teradataml/data/hconvolve_complex_right.csv +5 -0
  480. teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
  481. teradataml/data/histogram_example.json +12 -0
  482. teradataml/data/hmmdecoder_example.json +79 -0
  483. teradataml/data/hmmevaluator_example.json +25 -0
  484. teradataml/data/hmmsupervised_example.json +10 -0
  485. teradataml/data/hmmunsupervised_example.json +8 -0
  486. teradataml/data/hnsw_alter_data.csv +5 -0
  487. teradataml/data/hnsw_data.csv +10 -0
  488. teradataml/data/house_values.csv +12 -0
  489. teradataml/data/house_values2.csv +13 -0
  490. teradataml/data/housing_cat.csv +7 -0
  491. teradataml/data/housing_data.csv +9 -0
  492. teradataml/data/housing_test.csv +47 -0
  493. teradataml/data/housing_test_binary.csv +47 -0
  494. teradataml/data/housing_train.csv +493 -0
  495. teradataml/data/housing_train_attribute.csv +5 -0
  496. teradataml/data/housing_train_binary.csv +437 -0
  497. teradataml/data/housing_train_parameter.csv +2 -0
  498. teradataml/data/housing_train_response.csv +493 -0
  499. teradataml/data/housing_train_segment.csv +201 -0
  500. teradataml/data/ibm_stock.csv +370 -0
  501. teradataml/data/ibm_stock1.csv +370 -0
  502. teradataml/data/identitymatch_example.json +22 -0
  503. teradataml/data/idf_table.csv +4 -0
  504. teradataml/data/idwt2d_dataTable.csv +5 -0
  505. teradataml/data/idwt_dataTable.csv +8 -0
  506. teradataml/data/idwt_filterTable.csv +3 -0
  507. teradataml/data/impressions.csv +101 -0
  508. teradataml/data/inflation.csv +21 -0
  509. teradataml/data/initial.csv +3 -0
  510. teradataml/data/insect2Cols.csv +61 -0
  511. teradataml/data/insect_sprays.csv +13 -0
  512. teradataml/data/insurance.csv +1339 -0
  513. teradataml/data/interpolator_example.json +13 -0
  514. teradataml/data/interval_data.csv +5 -0
  515. teradataml/data/iris_altinput.csv +481 -0
  516. teradataml/data/iris_attribute_output.csv +8 -0
  517. teradataml/data/iris_attribute_test.csv +121 -0
  518. teradataml/data/iris_attribute_train.csv +481 -0
  519. teradataml/data/iris_category_expect_predict.csv +31 -0
  520. teradataml/data/iris_data.csv +151 -0
  521. teradataml/data/iris_input.csv +151 -0
  522. teradataml/data/iris_response_train.csv +121 -0
  523. teradataml/data/iris_test.csv +31 -0
  524. teradataml/data/iris_train.csv +121 -0
  525. teradataml/data/join_table1.csv +4 -0
  526. teradataml/data/join_table2.csv +4 -0
  527. teradataml/data/jsons/anly_function_name.json +7 -0
  528. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  529. teradataml/data/jsons/byom/dataikupredict.json +148 -0
  530. teradataml/data/jsons/byom/datarobotpredict.json +147 -0
  531. teradataml/data/jsons/byom/h2opredict.json +195 -0
  532. teradataml/data/jsons/byom/onnxembeddings.json +267 -0
  533. teradataml/data/jsons/byom/onnxpredict.json +187 -0
  534. teradataml/data/jsons/byom/pmmlpredict.json +147 -0
  535. teradataml/data/jsons/paired_functions.json +450 -0
  536. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
  537. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
  538. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
  539. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
  540. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
  541. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
  542. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
  543. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
  544. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
  545. teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
  546. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
  547. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
  548. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
  549. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
  550. teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
  551. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
  552. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
  553. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
  554. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
  555. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
  556. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
  557. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
  558. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
  559. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
  560. teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
  561. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
  562. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
  563. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
  564. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
  565. teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
  566. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
  567. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
  568. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
  569. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
  570. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
  571. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
  572. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
  573. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
  574. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
  575. teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
  576. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
  577. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
  578. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
  579. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
  580. teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
  581. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
  582. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
  583. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
  584. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
  585. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
  586. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
  587. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
  588. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
  589. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
  590. teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
  591. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
  592. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
  593. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
  594. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
  595. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
  596. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
  597. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
  598. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
  599. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
  600. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
  601. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
  602. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
  603. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
  604. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
  605. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
  606. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
  607. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
  608. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
  609. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
  610. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
  611. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
  612. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
  613. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
  614. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
  615. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
  616. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
  617. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
  618. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
  619. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
  620. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
  621. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
  622. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
  623. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
  624. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
  625. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
  626. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
  627. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
  628. teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
  629. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
  630. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
  631. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
  632. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
  633. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
  634. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
  635. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
  636. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
  637. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
  638. teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
  639. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
  640. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
  641. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
  642. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
  643. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  644. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
  645. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
  646. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  647. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
  648. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
  649. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
  650. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
  651. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
  652. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
  653. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
  654. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
  655. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
  656. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
  657. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
  658. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
  659. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
  660. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
  661. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
  662. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
  663. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
  664. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
  665. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
  666. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
  667. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
  668. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
  669. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
  670. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  671. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  672. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  673. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
  674. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
  675. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
  676. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
  677. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
  678. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
  679. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
  680. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
  681. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
  682. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
  683. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
  684. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
  685. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  686. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
  687. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
  688. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
  689. teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
  690. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
  691. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
  692. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
  693. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
  694. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
  695. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
  696. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
  697. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  698. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
  699. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
  700. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
  701. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
  702. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
  703. teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
  704. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
  705. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
  706. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
  707. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
  708. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  709. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
  710. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
  711. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  712. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
  713. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
  714. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
  715. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  716. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
  717. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
  718. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
  719. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
  720. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
  721. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
  722. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
  723. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
  724. teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
  725. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
  726. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
  727. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
  728. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
  729. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
  730. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
  731. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
  732. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
  733. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
  734. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
  735. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
  736. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  737. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  738. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  739. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  740. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  741. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  742. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  743. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  744. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  745. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  746. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  747. teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
  748. teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
  749. teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
  750. teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
  751. teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
  752. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  753. teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
  754. teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
  755. teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
  756. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
  757. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
  758. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
  759. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  760. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  761. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
  762. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
  763. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
  764. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
  765. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
  766. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
  767. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
  768. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
  769. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
  770. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
  771. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
  772. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
  773. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
  774. teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
  775. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
  776. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  777. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  778. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
  779. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
  780. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
  781. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
  782. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
  783. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
  784. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
  785. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
  786. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  787. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  788. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
  789. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  790. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
  791. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
  792. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
  793. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  794. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
  795. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
  796. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
  797. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
  798. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
  799. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
  800. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
  801. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
  802. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  803. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
  804. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
  805. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
  806. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
  807. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
  808. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
  809. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
  810. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
  811. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
  812. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
  813. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
  814. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  815. teradataml/data/kmeans_example.json +23 -0
  816. teradataml/data/kmeans_table.csv +10 -0
  817. teradataml/data/kmeans_us_arrests_data.csv +51 -0
  818. teradataml/data/knn_example.json +19 -0
  819. teradataml/data/knnrecommender_example.json +7 -0
  820. teradataml/data/knnrecommenderpredict_example.json +12 -0
  821. teradataml/data/lar_example.json +17 -0
  822. teradataml/data/larpredict_example.json +30 -0
  823. teradataml/data/lc_new_predictors.csv +5 -0
  824. teradataml/data/lc_new_reference.csv +9 -0
  825. teradataml/data/lda_example.json +9 -0
  826. teradataml/data/ldainference_example.json +15 -0
  827. teradataml/data/ldatopicsummary_example.json +9 -0
  828. teradataml/data/levendist_input.csv +13 -0
  829. teradataml/data/levenshteindistance_example.json +10 -0
  830. teradataml/data/linreg_example.json +10 -0
  831. teradataml/data/load_example_data.py +350 -0
  832. teradataml/data/loan_prediction.csv +295 -0
  833. teradataml/data/lungcancer.csv +138 -0
  834. teradataml/data/mappingdata.csv +12 -0
  835. teradataml/data/medical_readings.csv +101 -0
  836. teradataml/data/milk_timeseries.csv +157 -0
  837. teradataml/data/min_max_titanic.csv +4 -0
  838. teradataml/data/minhash_example.json +6 -0
  839. teradataml/data/ml_ratings.csv +7547 -0
  840. teradataml/data/ml_ratings_10.csv +2445 -0
  841. teradataml/data/mobile_data.csv +13 -0
  842. teradataml/data/model1_table.csv +5 -0
  843. teradataml/data/model2_table.csv +5 -0
  844. teradataml/data/models/License_file.txt +1 -0
  845. teradataml/data/models/License_file_empty.txt +0 -0
  846. teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
  847. teradataml/data/models/dr_iris_rf +0 -0
  848. teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
  849. teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
  850. teradataml/data/models/iris_db_glm_model.pmml +57 -0
  851. teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
  852. teradataml/data/models/iris_kmeans_model +0 -0
  853. teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
  854. teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
  855. teradataml/data/modularity_example.json +12 -0
  856. teradataml/data/movavg_example.json +8 -0
  857. teradataml/data/mtx1.csv +7 -0
  858. teradataml/data/mtx2.csv +13 -0
  859. teradataml/data/multi_model_classification.csv +401 -0
  860. teradataml/data/multi_model_regression.csv +401 -0
  861. teradataml/data/mvdfft8.csv +9 -0
  862. teradataml/data/naivebayes_example.json +10 -0
  863. teradataml/data/naivebayespredict_example.json +19 -0
  864. teradataml/data/naivebayestextclassifier2_example.json +7 -0
  865. teradataml/data/naivebayestextclassifier_example.json +8 -0
  866. teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
  867. teradataml/data/name_Find_configure.csv +10 -0
  868. teradataml/data/namedentityfinder_example.json +14 -0
  869. teradataml/data/namedentityfinderevaluator_example.json +10 -0
  870. teradataml/data/namedentityfindertrainer_example.json +6 -0
  871. teradataml/data/nb_iris_input_test.csv +31 -0
  872. teradataml/data/nb_iris_input_train.csv +121 -0
  873. teradataml/data/nbp_iris_model.csv +13 -0
  874. teradataml/data/ner_dict.csv +8 -0
  875. teradataml/data/ner_extractor_text.csv +2 -0
  876. teradataml/data/ner_input_eng.csv +7 -0
  877. teradataml/data/ner_rule.csv +5 -0
  878. teradataml/data/ner_sports_test2.csv +29 -0
  879. teradataml/data/ner_sports_train.csv +501 -0
  880. teradataml/data/nerevaluator_example.json +6 -0
  881. teradataml/data/nerextractor_example.json +18 -0
  882. teradataml/data/nermem_sports_test.csv +18 -0
  883. teradataml/data/nermem_sports_train.csv +51 -0
  884. teradataml/data/nertrainer_example.json +7 -0
  885. teradataml/data/ngrams_example.json +7 -0
  886. teradataml/data/notebooks/__init__.py +0 -0
  887. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
  888. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
  889. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
  890. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
  891. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
  892. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
  893. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
  894. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
  895. teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
  896. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
  897. teradataml/data/npath_example.json +23 -0
  898. teradataml/data/ntree_example.json +14 -0
  899. teradataml/data/numeric_strings.csv +5 -0
  900. teradataml/data/numerics.csv +4 -0
  901. teradataml/data/ocean_buoy.csv +17 -0
  902. teradataml/data/ocean_buoy2.csv +17 -0
  903. teradataml/data/ocean_buoys.csv +28 -0
  904. teradataml/data/ocean_buoys2.csv +10 -0
  905. teradataml/data/ocean_buoys_nonpti.csv +28 -0
  906. teradataml/data/ocean_buoys_seq.csv +29 -0
  907. teradataml/data/onehot_encoder_train.csv +4 -0
  908. teradataml/data/openml_example.json +92 -0
  909. teradataml/data/optional_event_table.csv +4 -0
  910. teradataml/data/orders1.csv +11 -0
  911. teradataml/data/orders1_12.csv +13 -0
  912. teradataml/data/orders_ex.csv +4 -0
  913. teradataml/data/pack_example.json +9 -0
  914. teradataml/data/package_tracking.csv +19 -0
  915. teradataml/data/package_tracking_pti.csv +19 -0
  916. teradataml/data/pagerank_example.json +13 -0
  917. teradataml/data/paragraphs_input.csv +6 -0
  918. teradataml/data/pathanalyzer_example.json +8 -0
  919. teradataml/data/pathgenerator_example.json +8 -0
  920. teradataml/data/patient_profile.csv +101 -0
  921. teradataml/data/pattern_matching_data.csv +11 -0
  922. teradataml/data/payment_fraud_dataset.csv +10001 -0
  923. teradataml/data/peppers.png +0 -0
  924. teradataml/data/phrases.csv +7 -0
  925. teradataml/data/pivot_example.json +9 -0
  926. teradataml/data/pivot_input.csv +22 -0
  927. teradataml/data/playerRating.csv +31 -0
  928. teradataml/data/pos_input.csv +40 -0
  929. teradataml/data/postagger_example.json +7 -0
  930. teradataml/data/posttagger_output.csv +44 -0
  931. teradataml/data/production_data.csv +17 -0
  932. teradataml/data/production_data2.csv +7 -0
  933. teradataml/data/randomsample_example.json +32 -0
  934. teradataml/data/randomwalksample_example.json +9 -0
  935. teradataml/data/rank_table.csv +6 -0
  936. teradataml/data/real_values.csv +14 -0
  937. teradataml/data/ref_mobile_data.csv +4 -0
  938. teradataml/data/ref_mobile_data_dense.csv +2 -0
  939. teradataml/data/ref_url.csv +17 -0
  940. teradataml/data/restaurant_reviews.csv +7 -0
  941. teradataml/data/retail_churn_table.csv +27772 -0
  942. teradataml/data/river_data.csv +145 -0
  943. teradataml/data/roc_example.json +8 -0
  944. teradataml/data/roc_input.csv +101 -0
  945. teradataml/data/rule_inputs.csv +6 -0
  946. teradataml/data/rule_table.csv +2 -0
  947. teradataml/data/sales.csv +7 -0
  948. teradataml/data/sales_transaction.csv +501 -0
  949. teradataml/data/salesdata.csv +342 -0
  950. teradataml/data/sample_cities.csv +3 -0
  951. teradataml/data/sample_shapes.csv +11 -0
  952. teradataml/data/sample_streets.csv +3 -0
  953. teradataml/data/sampling_example.json +16 -0
  954. teradataml/data/sax_example.json +17 -0
  955. teradataml/data/scale_attributes.csv +3 -0
  956. teradataml/data/scale_example.json +74 -0
  957. teradataml/data/scale_housing.csv +11 -0
  958. teradataml/data/scale_housing_test.csv +6 -0
  959. teradataml/data/scale_input_part_sparse.csv +31 -0
  960. teradataml/data/scale_input_partitioned.csv +16 -0
  961. teradataml/data/scale_input_sparse.csv +11 -0
  962. teradataml/data/scale_parameters.csv +3 -0
  963. teradataml/data/scale_stat.csv +11 -0
  964. teradataml/data/scalebypartition_example.json +13 -0
  965. teradataml/data/scalemap_example.json +13 -0
  966. teradataml/data/scalesummary_example.json +12 -0
  967. teradataml/data/score_category.csv +101 -0
  968. teradataml/data/score_summary.csv +4 -0
  969. teradataml/data/script_example.json +10 -0
  970. teradataml/data/scripts/deploy_script.py +84 -0
  971. teradataml/data/scripts/lightgbm/dataset.template +175 -0
  972. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
  973. teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
  974. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
  975. teradataml/data/scripts/mapper.R +20 -0
  976. teradataml/data/scripts/mapper.py +16 -0
  977. teradataml/data/scripts/mapper_replace.py +16 -0
  978. teradataml/data/scripts/sklearn/__init__.py +0 -0
  979. teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
  980. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
  981. teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
  982. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
  983. teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
  984. teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
  985. teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
  986. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  987. teradataml/data/seeds.csv +10 -0
  988. teradataml/data/sentenceextractor_example.json +7 -0
  989. teradataml/data/sentiment_extract_input.csv +11 -0
  990. teradataml/data/sentiment_train.csv +16 -0
  991. teradataml/data/sentiment_word.csv +20 -0
  992. teradataml/data/sentiment_word_input.csv +20 -0
  993. teradataml/data/sentimentextractor_example.json +24 -0
  994. teradataml/data/sentimenttrainer_example.json +8 -0
  995. teradataml/data/sequence_table.csv +10 -0
  996. teradataml/data/seriessplitter_example.json +8 -0
  997. teradataml/data/sessionize_example.json +17 -0
  998. teradataml/data/sessionize_table.csv +116 -0
  999. teradataml/data/setop_test1.csv +24 -0
  1000. teradataml/data/setop_test2.csv +22 -0
  1001. teradataml/data/soc_nw_edges.csv +11 -0
  1002. teradataml/data/soc_nw_vertices.csv +8 -0
  1003. teradataml/data/souvenir_timeseries.csv +168 -0
  1004. teradataml/data/sparse_iris_attribute.csv +5 -0
  1005. teradataml/data/sparse_iris_test.csv +121 -0
  1006. teradataml/data/sparse_iris_train.csv +601 -0
  1007. teradataml/data/star1.csv +6 -0
  1008. teradataml/data/star_pivot.csv +8 -0
  1009. teradataml/data/state_transition.csv +5 -0
  1010. teradataml/data/stock_data.csv +53 -0
  1011. teradataml/data/stock_movement.csv +11 -0
  1012. teradataml/data/stock_vol.csv +76 -0
  1013. teradataml/data/stop_words.csv +8 -0
  1014. teradataml/data/store_sales.csv +37 -0
  1015. teradataml/data/stringsimilarity_example.json +8 -0
  1016. teradataml/data/strsimilarity_input.csv +13 -0
  1017. teradataml/data/students.csv +101 -0
  1018. teradataml/data/svm_iris_input_test.csv +121 -0
  1019. teradataml/data/svm_iris_input_train.csv +481 -0
  1020. teradataml/data/svm_iris_model.csv +7 -0
  1021. teradataml/data/svmdense_example.json +10 -0
  1022. teradataml/data/svmdensepredict_example.json +19 -0
  1023. teradataml/data/svmsparse_example.json +8 -0
  1024. teradataml/data/svmsparsepredict_example.json +14 -0
  1025. teradataml/data/svmsparsesummary_example.json +8 -0
  1026. teradataml/data/target_mobile_data.csv +13 -0
  1027. teradataml/data/target_mobile_data_dense.csv +5 -0
  1028. teradataml/data/target_udt_data.csv +8 -0
  1029. teradataml/data/tdnerextractor_example.json +14 -0
  1030. teradataml/data/templatedata.csv +1201 -0
  1031. teradataml/data/templates/open_source_ml.json +11 -0
  1032. teradataml/data/teradata_icon.ico +0 -0
  1033. teradataml/data/teradataml_example.json +1473 -0
  1034. teradataml/data/test_classification.csv +101 -0
  1035. teradataml/data/test_loan_prediction.csv +53 -0
  1036. teradataml/data/test_pacf_12.csv +37 -0
  1037. teradataml/data/test_prediction.csv +101 -0
  1038. teradataml/data/test_regression.csv +101 -0
  1039. teradataml/data/test_river2.csv +109 -0
  1040. teradataml/data/text_inputs.csv +6 -0
  1041. teradataml/data/textchunker_example.json +8 -0
  1042. teradataml/data/textclassifier_example.json +7 -0
  1043. teradataml/data/textclassifier_input.csv +7 -0
  1044. teradataml/data/textclassifiertrainer_example.json +7 -0
  1045. teradataml/data/textmorph_example.json +11 -0
  1046. teradataml/data/textparser_example.json +15 -0
  1047. teradataml/data/texttagger_example.json +12 -0
  1048. teradataml/data/texttokenizer_example.json +7 -0
  1049. teradataml/data/texttrainer_input.csv +11 -0
  1050. teradataml/data/tf_example.json +7 -0
  1051. teradataml/data/tfidf_example.json +14 -0
  1052. teradataml/data/tfidf_input1.csv +201 -0
  1053. teradataml/data/tfidf_train.csv +6 -0
  1054. teradataml/data/time_table1.csv +535 -0
  1055. teradataml/data/time_table2.csv +14 -0
  1056. teradataml/data/timeseriesdata.csv +1601 -0
  1057. teradataml/data/timeseriesdatasetsd4.csv +105 -0
  1058. teradataml/data/timestamp_data.csv +4 -0
  1059. teradataml/data/titanic.csv +892 -0
  1060. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  1061. teradataml/data/to_num_data.csv +4 -0
  1062. teradataml/data/tochar_data.csv +5 -0
  1063. teradataml/data/token_table.csv +696 -0
  1064. teradataml/data/train_multiclass.csv +101 -0
  1065. teradataml/data/train_regression.csv +101 -0
  1066. teradataml/data/train_regression_multiple_labels.csv +101 -0
  1067. teradataml/data/train_tracking.csv +28 -0
  1068. teradataml/data/trans_dense.csv +16 -0
  1069. teradataml/data/trans_sparse.csv +55 -0
  1070. teradataml/data/transformation_table.csv +6 -0
  1071. teradataml/data/transformation_table_new.csv +2 -0
  1072. teradataml/data/tv_spots.csv +16 -0
  1073. teradataml/data/twod_climate_data.csv +117 -0
  1074. teradataml/data/uaf_example.json +529 -0
  1075. teradataml/data/univariatestatistics_example.json +9 -0
  1076. teradataml/data/unpack_example.json +10 -0
  1077. teradataml/data/unpivot_example.json +25 -0
  1078. teradataml/data/unpivot_input.csv +8 -0
  1079. teradataml/data/url_data.csv +10 -0
  1080. teradataml/data/us_air_pass.csv +37 -0
  1081. teradataml/data/us_population.csv +624 -0
  1082. teradataml/data/us_states_shapes.csv +52 -0
  1083. teradataml/data/varmax_example.json +18 -0
  1084. teradataml/data/vectordistance_example.json +30 -0
  1085. teradataml/data/ville_climatedata.csv +121 -0
  1086. teradataml/data/ville_tempdata.csv +12 -0
  1087. teradataml/data/ville_tempdata1.csv +12 -0
  1088. teradataml/data/ville_temperature.csv +11 -0
  1089. teradataml/data/waveletTable.csv +1605 -0
  1090. teradataml/data/waveletTable2.csv +1605 -0
  1091. teradataml/data/weightedmovavg_example.json +9 -0
  1092. teradataml/data/wft_testing.csv +5 -0
  1093. teradataml/data/windowdfft.csv +16 -0
  1094. teradataml/data/wine_data.csv +1600 -0
  1095. teradataml/data/word_embed_input_table1.csv +6 -0
  1096. teradataml/data/word_embed_input_table2.csv +5 -0
  1097. teradataml/data/word_embed_model.csv +23 -0
  1098. teradataml/data/words_input.csv +13 -0
  1099. teradataml/data/xconvolve_complex_left.csv +6 -0
  1100. teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
  1101. teradataml/data/xgboost_example.json +36 -0
  1102. teradataml/data/xgboostpredict_example.json +32 -0
  1103. teradataml/data/ztest_example.json +16 -0
  1104. teradataml/dataframe/__init__.py +0 -0
  1105. teradataml/dataframe/copy_to.py +2446 -0
  1106. teradataml/dataframe/data_transfer.py +2840 -0
  1107. teradataml/dataframe/dataframe.py +20908 -0
  1108. teradataml/dataframe/dataframe_utils.py +2114 -0
  1109. teradataml/dataframe/fastload.py +794 -0
  1110. teradataml/dataframe/functions.py +2110 -0
  1111. teradataml/dataframe/indexer.py +424 -0
  1112. teradataml/dataframe/row.py +160 -0
  1113. teradataml/dataframe/setop.py +1171 -0
  1114. teradataml/dataframe/sql.py +10904 -0
  1115. teradataml/dataframe/sql_function_parameters.py +440 -0
  1116. teradataml/dataframe/sql_functions.py +652 -0
  1117. teradataml/dataframe/sql_interfaces.py +220 -0
  1118. teradataml/dataframe/vantage_function_types.py +675 -0
  1119. teradataml/dataframe/window.py +694 -0
  1120. teradataml/dbutils/__init__.py +3 -0
  1121. teradataml/dbutils/dbutils.py +2871 -0
  1122. teradataml/dbutils/filemgr.py +318 -0
  1123. teradataml/gen_ai/__init__.py +2 -0
  1124. teradataml/gen_ai/convAI.py +473 -0
  1125. teradataml/geospatial/__init__.py +4 -0
  1126. teradataml/geospatial/geodataframe.py +1105 -0
  1127. teradataml/geospatial/geodataframecolumn.py +392 -0
  1128. teradataml/geospatial/geometry_types.py +926 -0
  1129. teradataml/hyperparameter_tuner/__init__.py +1 -0
  1130. teradataml/hyperparameter_tuner/optimizer.py +4115 -0
  1131. teradataml/hyperparameter_tuner/utils.py +303 -0
  1132. teradataml/lib/__init__.py +0 -0
  1133. teradataml/lib/aed_0_1.dll +0 -0
  1134. teradataml/lib/libaed_0_1.dylib +0 -0
  1135. teradataml/lib/libaed_0_1.so +0 -0
  1136. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  1137. teradataml/lib/libaed_0_1_ppc64le.so +0 -0
  1138. teradataml/opensource/__init__.py +1 -0
  1139. teradataml/opensource/_base.py +1321 -0
  1140. teradataml/opensource/_class.py +464 -0
  1141. teradataml/opensource/_constants.py +61 -0
  1142. teradataml/opensource/_lightgbm.py +949 -0
  1143. teradataml/opensource/_sklearn.py +1008 -0
  1144. teradataml/opensource/_wrapper_utils.py +267 -0
  1145. teradataml/options/__init__.py +148 -0
  1146. teradataml/options/configure.py +489 -0
  1147. teradataml/options/display.py +187 -0
  1148. teradataml/plot/__init__.py +3 -0
  1149. teradataml/plot/axis.py +1427 -0
  1150. teradataml/plot/constants.py +15 -0
  1151. teradataml/plot/figure.py +431 -0
  1152. teradataml/plot/plot.py +810 -0
  1153. teradataml/plot/query_generator.py +83 -0
  1154. teradataml/plot/subplot.py +216 -0
  1155. teradataml/scriptmgmt/UserEnv.py +4273 -0
  1156. teradataml/scriptmgmt/__init__.py +3 -0
  1157. teradataml/scriptmgmt/lls_utils.py +2157 -0
  1158. teradataml/sdk/README.md +79 -0
  1159. teradataml/sdk/__init__.py +4 -0
  1160. teradataml/sdk/_auth_modes.py +422 -0
  1161. teradataml/sdk/_func_params.py +487 -0
  1162. teradataml/sdk/_json_parser.py +453 -0
  1163. teradataml/sdk/_openapi_spec_constants.py +249 -0
  1164. teradataml/sdk/_utils.py +236 -0
  1165. teradataml/sdk/api_client.py +900 -0
  1166. teradataml/sdk/constants.py +62 -0
  1167. teradataml/sdk/modelops/__init__.py +98 -0
  1168. teradataml/sdk/modelops/_client.py +409 -0
  1169. teradataml/sdk/modelops/_constants.py +304 -0
  1170. teradataml/sdk/modelops/models.py +2308 -0
  1171. teradataml/sdk/spinner.py +107 -0
  1172. teradataml/series/__init__.py +0 -0
  1173. teradataml/series/series.py +537 -0
  1174. teradataml/series/series_utils.py +71 -0
  1175. teradataml/store/__init__.py +12 -0
  1176. teradataml/store/feature_store/__init__.py +0 -0
  1177. teradataml/store/feature_store/constants.py +658 -0
  1178. teradataml/store/feature_store/feature_store.py +4814 -0
  1179. teradataml/store/feature_store/mind_map.py +639 -0
  1180. teradataml/store/feature_store/models.py +7330 -0
  1181. teradataml/store/feature_store/utils.py +390 -0
  1182. teradataml/table_operators/Apply.py +979 -0
  1183. teradataml/table_operators/Script.py +1739 -0
  1184. teradataml/table_operators/TableOperator.py +1343 -0
  1185. teradataml/table_operators/__init__.py +2 -0
  1186. teradataml/table_operators/apply_query_generator.py +262 -0
  1187. teradataml/table_operators/query_generator.py +493 -0
  1188. teradataml/table_operators/table_operator_query_generator.py +462 -0
  1189. teradataml/table_operators/table_operator_util.py +726 -0
  1190. teradataml/table_operators/templates/dataframe_apply.template +184 -0
  1191. teradataml/table_operators/templates/dataframe_map.template +176 -0
  1192. teradataml/table_operators/templates/dataframe_register.template +73 -0
  1193. teradataml/table_operators/templates/dataframe_udf.template +67 -0
  1194. teradataml/table_operators/templates/script_executor.template +170 -0
  1195. teradataml/telemetry_utils/__init__.py +0 -0
  1196. teradataml/telemetry_utils/queryband.py +53 -0
  1197. teradataml/utils/__init__.py +0 -0
  1198. teradataml/utils/docstring.py +527 -0
  1199. teradataml/utils/dtypes.py +943 -0
  1200. teradataml/utils/internal_buffer.py +122 -0
  1201. teradataml/utils/print_versions.py +206 -0
  1202. teradataml/utils/utils.py +451 -0
  1203. teradataml/utils/validators.py +3305 -0
  1204. teradataml-20.0.0.8.dist-info/METADATA +2804 -0
  1205. teradataml-20.0.0.8.dist-info/RECORD +1208 -0
  1206. teradataml-20.0.0.8.dist-info/WHEEL +5 -0
  1207. teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
  1208. teradataml-20.0.0.8.dist-info/zip-safe +1 -0
@@ -0,0 +1,4814 @@
1
+ """
2
+ Copyright (c) 2024 by Teradata Corporation. All rights reserved.
3
+ TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
4
+
5
+ Primary Owner: pradeep.garre@teradata.com
6
+ Secondary Owner: adithya.avvaru@teradata.com
7
+
8
+ This file implements the core framework that allows user to use Teradata Enterprise Feature Store.
9
+ """
10
+ import os.path
11
+ import operator
12
+ import random
13
+ from functools import reduce
14
+ from sqlalchemy import literal_column
15
+ from teradataml.context.context import get_connection, _get_current_databasename
16
+ from teradataml.common.constants import SQLConstants, AccessQueries
17
+ from teradataml.common.exceptions import TeradataMlException
18
+ from teradataml.common.messages import Messages
19
+ from teradataml.common.messagecodes import MessageCodes
20
+ from teradataml.dataframe.sql import _SQLColumnExpression as Col
21
+ from teradataml.dbutils.dbutils import _create_database, _create_table, db_drop_table, execute_sql, Grant, Revoke, \
22
+ _update_data, _delete_data, db_transaction, db_list_tables, _insert_data, \
23
+ _is_trigger_exist, db_drop_view, _get_quoted_object_name
24
+ from teradataml.store.feature_store.constants import *
25
+ from teradataml.store.feature_store.mind_map import _TD_FS_MindMap_Template
26
+ from teradataml.store.feature_store.models import *
27
+ from teradataml.store.feature_store.constants import _FeatureStoreDFContainer
28
+ from teradataml.common.sqlbundle import SQLBundle
29
+ from teradataml.utils.validators import _Validators
30
+ from teradataml.store.feature_store.utils import _FSUtils
31
+ from teradataml.common.logger import TeradataMlLogger, get_td_logger
32
+
33
+ @TeradataMlLogger
34
+ class FeatureStore:
35
+ """Class for FeatureStore."""
36
+
37
+ def __init__(self,
38
+ repo,
39
+ data_domain=None,
40
+ check=True):
41
+ """
42
+ DESCRIPTION:
43
+ Method to create FeatureStore in teradataml.
44
+ Note:
45
+ * One should establish a connection to Vantage using create_context()
46
+ before creating a FeatureStore object.
47
+
48
+ PARAMETERS:
49
+ repo:
50
+ Required Argument.
51
+ Specifies the repository name.
52
+ Types: str.
53
+
54
+ data_domain:
55
+ Optional Argument.
56
+ Specifies the data domain to which FeatureStore points to.
57
+ Note:
58
+ * If not specified, then default database name is considered as data domain.
59
+ Types: str
60
+
61
+ check:
62
+ Optional Argument.
63
+ Specifies whether to check the existence of the Feature store DB objects or not.
64
+ When set to True, the method checks for the existence of Feature store DB objects.
65
+ Otherwise, the method does not verify the existence of Feature store DB objects.
66
+ Default Value: True
67
+ Types: bool
68
+
69
+ RETURNS:
70
+ Object of FeatureStore.
71
+
72
+ RAISES:
73
+ None
74
+
75
+ EXAMPLES:
76
+ # Example 1: Create an instance of FeatureStore for repository 'vfs_v1'.
77
+ >>> from teradataml import FeatureStore
78
+ >>> fs = FeatureStore(repo='vfs_v1')
79
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
80
+
81
+ >>> fs.setup()
82
+ True
83
+
84
+ >>> fs
85
+ VantageFeatureStore(abc)-v2.0.0
86
+ """
87
+ argument_validation_params = []
88
+ argument_validation_params.append(["repo", repo, False, (str), True])
89
+
90
+ # Validate argument types
91
+ _Validators._validate_function_arguments(argument_validation_params)
92
+
93
+ connection = get_connection()
94
+ if connection is None:
95
+ raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE),
96
+ MessageCodes.CONNECTION_FAILURE)
97
+ self._logger.info(f"Creating FeatureStore for repo: {repo}, data_domain: {data_domain}, check: {check}")
98
+
99
+ # Do not validate the existance of repo as it consumes a network call.
100
+ self.__repo = repo
101
+ self.__version = ""
102
+
103
+ # Declare SQLBundle to use it further.
104
+ self.__sql_bundle = SQLBundle()
105
+
106
+ # Store all the DF's here so no need to create these every time.
107
+ self.__df_container = {}
108
+ self._logger.debug("Initialized FeatureStore attributes and containers")
109
+
110
+ # Store the table names here. Then use this where ever required.
111
+ self.__table_names = EFS_DB_COMPONENTS
112
+
113
+ # Declare getter's for getting the corresponding DataFrame's using _FeatureStoreDFContainer directly.
114
+ # Only keep the lambda functions that are actually used in the codebase
115
+ self.__get_features_df = lambda : _FeatureStoreDFContainer.get_df("feature", self.__repo, self.__data_domain)
116
+ self.__get_features_wog_df = lambda : _FeatureStoreDFContainer.get_df("feature_wog", self.__repo, self.__data_domain)
117
+ self.__get_archived_features_df = lambda : _FeatureStoreDFContainer.get_df("feature_staging", self.__repo, self.__data_domain)
118
+ self.__get_feature_group_df = lambda : _FeatureStoreDFContainer.get_df("feature_group", self.__repo, self.__data_domain)
119
+ self.__get_archived_feature_group_df = lambda : _FeatureStoreDFContainer.get_df("feature_group_staging", self.__repo, self.__data_domain)
120
+ self.__get_entity_df = lambda : _FeatureStoreDFContainer.get_df("entity", self.__repo, self.__data_domain)
121
+ self.__get_archived_entity_df = lambda : _FeatureStoreDFContainer.get_df("entity_staging", self.__repo, self.__data_domain)
122
+ self.__get_data_source_df = lambda : _FeatureStoreDFContainer.get_df("data_source", self.__repo, self.__data_domain)
123
+ self.__get_archived_data_source_df = lambda : _FeatureStoreDFContainer.get_df("data_source_staging", self.__repo, self.__data_domain)
124
+ self.__get_dataset_catalog_df = lambda : _FeatureStoreDFContainer.get_df("dataset_catalog", self.__repo, self.__data_domain)
125
+ self.__get_data_domain_df = lambda : _FeatureStoreDFContainer.get_df("data_domain", self.__repo, self.__data_domain)
126
+ self.__get_feature_process_df = lambda : _FeatureStoreDFContainer.get_df("feature_process", self.__repo, self.__data_domain)
127
+ self.__get_features_metadata_df = lambda : _FeatureStoreDFContainer.get_df("feature_metadata", self.__repo, self.__data_domain)
128
+ self.__get_feature_info_df = lambda: _FeatureStoreDFContainer.get_df("feature_info", self.__repo, self.__data_domain)
129
+ self.__get_dataset_features_df = lambda: _FeatureStoreDFContainer.get_df("dataset_features", self.__repo, self.__data_domain)
130
+ self.__get_feature_runs_df = lambda : _FeatureStoreDFContainer.get_df("feature_runs", self.__repo, self.__data_domain)
131
+ self.__get_without_valid_period_df = lambda df: df.drop(columns=['ValidPeriod'])
132
+ self.__get_feature_version = lambda: _FeatureStoreDFContainer.get_df("feature_version", self.__repo, self.__data_domain)
133
+
134
+ self.__good_status = "Good"
135
+ self.__bad_status = "Bad"
136
+ self.__repaired_status = "Repaired"
137
+
138
+ self.__data_domain = data_domain if data_domain is not None else _get_current_databasename()
139
+ self._logger.debug(f"Set data domain to: {self.__data_domain}")
140
+
141
+ self.__repo_exists = connection.dialect._get_database_names(connection, self.__repo)
142
+ self._logger.debug(f"Repository exists check: {self.__repo_exists}")
143
+
144
+ if check:
145
+ self._logger.info("Checking existence of FeatureStore DB objects for repo: {}, data_domain: {}".format(repo, self.__data_domain))
146
+ return self.__validate_repo_exists()
147
+ else:
148
+ # If check is False, then do not check for the existence of DB objects.
149
+ self._logger.debug("Skipping repository validation and adding data domain")
150
+ self.__add_data_domain()
151
+
152
+ self._logger.info(f"FeatureStore created for repo: {repo}, data_domain: {data_domain}, check: {check}")
153
+
154
+ def __validate_repo_exists(self):
155
+ """
156
+ Validate the repository.
157
+
158
+ PARAMETERS:
159
+ None
160
+
161
+ RETURNS:
162
+ None
163
+
164
+ RAISES:
165
+ ValueError: If the repo is invalid.
166
+ """
167
+ self._logger.debug(f"Validating repository '{self.__repo}' existence and completeness")
168
+
169
+ # Check whether the repo exists or not.
170
+ if not self.__repo_exists:
171
+ self._logger.info(f"Repository '{self.__repo}' does not exist")
172
+ print("Repo {} does not exist. Run FeatureStore.setup() " \
173
+ "to create the repo and setup FeatureStore.".format(self.__repo))
174
+ return
175
+
176
+ # Check whether all the EFS tables exist or not.
177
+ existing_tabs = db_list_tables(schema_name=self.__repo, object_name='_efs%')
178
+ if not existing_tabs.empty:
179
+ existing_tables = set(existing_tabs['TableName'].tolist())
180
+ all_tables_exist = all(val in existing_tables for val in EFS_TABLES.values())
181
+ self._logger.debug(f"Found {len(existing_tables)} EFS tables in repository, all required tables exist: {all_tables_exist}")
182
+ else:
183
+ all_tables_exist = False
184
+ self._logger.debug("No EFS tables found in repository")
185
+
186
+ # Check whether all the EFS triggers exist or not.
187
+ all_triggers_exist, num_trigger_exist = _is_trigger_exist(self.__repo, list(EFS_TRIGGERS.values()))
188
+ self._logger.debug(f"EFS triggers status: {num_trigger_exist} triggers exist, all required triggers exist: {all_triggers_exist}")
189
+
190
+ # Check whether all the EFS tables and triggers exist or not.
191
+ # If exists, then insert the data domain name into _efs_data_domain table.
192
+ if all_tables_exist and all_triggers_exist:
193
+ self._logger.info("FeatureStore repository validation successful - all objects exist")
194
+ self.__add_data_domain()
195
+ # If all the tables and triggers are available, then
196
+ # FeatureStore is ready to use.
197
+ print("FeatureStore is ready to use.")
198
+ # All table and triggers does not exist.
199
+ # If the count of tables and triggers is 0, then
200
+ # FeatureStore is not setup.
201
+ elif num_trigger_exist == 0 and len(existing_tabs) == 0:
202
+ self._logger.info("FeatureStore is not setup - no objects found")
203
+ print("FeatureStore is not setup(). Run FeatureStore.setup() to setup FeatureStore.")
204
+ else:
205
+ self._logger.info("FeatureStore repository validation failed - some objects missing")
206
+ print("Some of the feature store objects are missing. Run FeatureStore.repair() to create missing objects.")
207
+
208
+ @property
209
+ def data_domain(self):
210
+ """
211
+ DESCRIPTION:
212
+ Get the data domain.
213
+
214
+ PARAMETERS:
215
+ None
216
+
217
+ RETURNS:
218
+ str
219
+
220
+ RAISES:
221
+ None
222
+
223
+ EXAMPLES:
224
+ # Example 1: Use existing FeatureStore 'vfs_v1' to get the data domain.
225
+ >>> from teradataml import FeatureStore
226
+ >>> fs = FeatureStore(repo='vfs_v1', data_domain='test_domain')
227
+ FeatureStore is ready to use.
228
+ >>> fs.data_domain
229
+ 'test_domain'
230
+ """
231
+ self._logger.debug(f"Accessing data_domain property: {self.__data_domain}")
232
+ return self.__data_domain
233
+
234
+ @data_domain.setter
235
+ def data_domain(self, value):
236
+ """
237
+ DESCRIPTION:
238
+ Set the data domain.
239
+
240
+ PARAMETERS:
241
+ value:
242
+ Required Argument.
243
+ Specifies the data domain name.
244
+ Types: str.
245
+
246
+ RETURNS:
247
+ None.
248
+
249
+ RAISES:
250
+ None
251
+
252
+ EXAMPLES:
253
+ # Example 1: Create or use existing FeatureStore for repository 'abc' and
254
+ # then change the data domain to 'xyz'.
255
+ >>> from teradataml import FeatureStore
256
+ >>> fs = FeatureStore('abc')
257
+ FeatureStore is ready to use.
258
+
259
+ # Set the data domain to 'xyz'.
260
+ >>> fs.data_domain = 'xyz'
261
+
262
+ # Get the data domain.
263
+ >>> fs.data_domain
264
+ 'xyz'
265
+ """
266
+ self._logger.info(f"Setting data domain from '{self.__data_domain}' to '{value}'")
267
+
268
+ argument_validation_params = []
269
+ argument_validation_params.append(["value", value, False, (str), True])
270
+
271
+ # Validate argument types
272
+ _Validators._validate_function_arguments(argument_validation_params)
273
+
274
+ # Set the data domain value.
275
+ self.__data_domain = value
276
+ self.__add_data_domain()
277
+ self._logger.debug(f"Data domain successfully set to: {value}")
278
+
279
+ def __add_data_domain(self):
280
+ """
281
+ DESCRIPTION:
282
+ Internal method to add the data domain.
283
+
284
+ PARAMETERS:
285
+ data_domain:
286
+ Required Argument.
287
+ Specifies the data domain name.
288
+ Types: str.
289
+
290
+ RETURNS:
291
+ None.
292
+
293
+ RAISES:
294
+ None
295
+
296
+ EXAMPLES:
297
+ >>> self.__add_data_domain()
298
+ """
299
+ self._logger.debug(f"Adding data domain '{self.__data_domain}' to EFS metadata")
300
+
301
+ # Add the data domain to the EFS_DATA_DOMAINS table.
302
+ _insert_data(table_name=self.__table_names['data_domain'],
303
+ schema_name=self.__repo,
304
+ values=(self.__data_domain, dt.utcnow()),
305
+ columns=["name", "created_time"],
306
+ ignore_errors=[2801])
307
+
308
+ self._logger.debug(f"Data domain '{self.__data_domain}' added to metadata table")
309
+
310
+ @property
311
+ def repo(self):
312
+ """
313
+ DESCRIPTION:
314
+ Get the repository.
315
+
316
+ PARAMETERS:
317
+ None
318
+
319
+ RETURNS:
320
+ str
321
+
322
+ RAISES:
323
+ None
324
+
325
+ EXAMPLES:
326
+ # Example 1: Get the repository name from FeatureStore.
327
+ >>> from teradataml import FeatureStore
328
+ >>> fs = FeatureStore('vfs_v1')
329
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
330
+
331
+ # Get the repository name.
332
+ >>> fs.repo
333
+ 'vfs_v1'
334
+ """
335
+ self._logger.debug(f"Accessing repo property: {self.__repo}")
336
+ return self.__repo
337
+
338
+ @repo.setter
339
+ def repo(self, value):
340
+ """
341
+ DESCRIPTION:
342
+ Set the repository.
343
+
344
+ PARAMETERS:
345
+ value:
346
+ Required Argument.
347
+ Specifies the repository name.
348
+ Types: str.
349
+
350
+ RETURNS:
351
+ None.
352
+
353
+ RAISES:
354
+ None
355
+
356
+ EXAMPLES:
357
+ # Example 1: Create a FeatureStore for repository 'abc' and
358
+ # then change the repository to 'xyz'.
359
+ >>> from teradataml import FeatureStore
360
+ >>> fs = FeatureStore('abc')
361
+ FeatureStore is ready to use.
362
+
363
+ # Get the repository name.
364
+ >>> fs.repo
365
+ 'abc'
366
+
367
+ # Set the repository to 'xyz'.
368
+ >>> fs.repo = 'xyz'
369
+ >>> fs.repo
370
+ 'xyz'
371
+ """
372
+ self._logger.info(f"Setting repository from '{self.__repo}' to '{value}'")
373
+
374
+ argument_validation_params = []
375
+ argument_validation_params.append(["value", value, False, (str), True])
376
+
377
+ # Validate argument types
378
+ _Validators._validate_function_arguments(argument_validation_params)
379
+
380
+ # remove all entries from container so they will be automatically
381
+ # point to new repo for subsequent API's.
382
+ self.__repo_exists = get_connection().dialect._get_database_names(get_connection(),
383
+ value)
384
+ self._logger.debug(f"Repository '{value}' exists: {self.__repo_exists}")
385
+
386
+ self.__validate_repo_exists()
387
+
388
+ self.__df_container.clear()
389
+ self._logger.debug("Cleared DataFrame container cache")
390
+
391
+ self.__version = None
392
+
393
+ # Set the repo value.
394
+ self.__repo = value
395
+ self._logger.debug(f"Repository successfully set to: {value}")
396
+
397
+ def __repr__(self):
398
+ """
399
+ DESCRIPTION:
400
+ String representation for FeatureStore object.
401
+
402
+ PARAMETERS:
403
+ None
404
+
405
+ RETURNS:
406
+ str
407
+
408
+ RAISES:
409
+ None
410
+
411
+ EXAMPLES:
412
+ >>> from teradataml import FeatureStore
413
+ >>> fs = FeatureStore('vfs_v1')
414
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
415
+
416
+ # Setup FeatureStore for this repository.
417
+ >>> fs.setup()
418
+
419
+ # Get the string representation of FeatureStore.
420
+ >>> fs
421
+ 'VantageFeatureStore(vfs_v1)-v2.0.0'
422
+
423
+ """
424
+ s = "VantageFeatureStore({})".format(self.__repo)
425
+ try:
426
+ version = "-v{}".format(self.__get_version())
427
+ self._logger.debug(f"Retrieved FeatureStore version: {version}")
428
+ except Exception as e:
429
+ self._logger.debug(f"Could not retrieve FeatureStore version: {e}")
430
+ version = ""
431
+
432
+ result = "{}{}".format(s, version)
433
+ self._logger.debug(f"Generated FeatureStore string representation: {result}")
434
+ return result
435
+
436
+ def __get_version(self):
437
+ """
438
+ DESCRIPTION:
439
+ Internal method to get the FeatureStore version.
440
+
441
+ PARAMETERS:
442
+ None
443
+
444
+ RETURNS:
445
+ str
446
+
447
+ RAISES:
448
+ None
449
+ """
450
+ if not self.__version:
451
+ self._logger.debug(f"Retrieving FeatureStore version from {self.__repo}.{self.__table_names['version']}")
452
+ sql = "SELECT version FROM {}.{}".format(self.__repo, self.__table_names['version'])
453
+ self.__version = next(execute_sql(sql))[0]
454
+ self._logger.debug(f"Retrieved version: {self.__version}")
455
+ return self.__version
456
+
457
+ @staticmethod
458
+ def list_repos() -> DataFrame:
459
+ """
460
+ DESCRIPTION:
461
+ Function to list down the repositories.
462
+
463
+ PARAMETERS:
464
+ None
465
+
466
+ RETURNS:
467
+ teradataml DataFrame
468
+
469
+ RAISES:
470
+ None
471
+
472
+ EXAMPLES:
473
+ >>> from teradataml import FeatureStore
474
+ # Example 1: List all the FeatureStore repositories using FeatureStore class.
475
+ >>> FeatureStore.list_repos()
476
+ repos
477
+ 0 vfs_v1
478
+
479
+ # Example 2: List all the FeatureStore repositories using FeatureStore object.
480
+ >>> fs = FeatureStore('vfs_v1')
481
+ FeatureStore is ready to use.
482
+
483
+ >>> fs.list_repos()
484
+ repos
485
+ 0 vfs_v1
486
+
487
+ """
488
+ get_td_logger().info("Listing all the FeatureStore repositories.")
489
+ df = DataFrame.from_query("select distinct DataBaseName as repos from dbc.tablesV where TableName='{}'".format(
490
+ EFS_DB_COMPONENTS['version']))
491
+
492
+ get_td_logger().debug(f"FeatureStore repositories listed:\n{df}")
493
+ return df
494
+
495
+ def setup(self, perm_size='10e9', spool_size='10e8'):
496
+ """
497
+ DESCRIPTION:
498
+ Function to setup all the required objects in Vantage for the specified
499
+ repository.
500
+ Note:
501
+ The function checks whether repository exists or not. If not exists,
502
+ it first creates the repository and then creates the corresponding tables.
503
+ Hence make sure the user with which is it connected to Vantage
504
+ has corresponding access rights for creating DataBase and creating
505
+ tables in the corresponding database.
506
+
507
+ PARAMETERS:
508
+ perm_size:
509
+ Optional Argument.
510
+ Specifies the number of bytes to allocate to FeatureStore "repo"
511
+ for permanent space.
512
+ Note:
513
+ Exponential notation can also be used.
514
+ Default Value: 10e9
515
+ Types: str or int
516
+
517
+ spool_size:
518
+ Optional Argument.
519
+ Specifies the number of bytes to allocate to FeatureStore "repo"
520
+ for spool space.
521
+ Note:
522
+ Exponential notation can also be used.
523
+ Default Value: 10e8
524
+ Types: str or int
525
+
526
+ RETURNS:
527
+ bool
528
+
529
+ RAISES:
530
+ TeradatamlException
531
+
532
+ EXAMPLES:
533
+ # Example 1: Setup FeatureStore for repository 'vfs_v1'.
534
+ >>> from teradataml import FeatureStore
535
+ # Create FeatureStore for repo 'vfs_v1'.
536
+ >>> fs = FeatureStore("vfs_v1")
537
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
538
+
539
+ # Setup FeatureStore for this repository.
540
+ >>> fs.setup()
541
+ True
542
+
543
+ >>> fs
544
+ VantageFeatureStore(vfs_v1)-v2.0.0
545
+
546
+ # Example 2: Setup FeatureStore for repository 'vfs_v2' with custom perm_size and spool_size.
547
+ # Create FeatureStore for repo 'vfs_v2'.
548
+ >>> fs = FeatureStore("vfs_v2")
549
+ Repo vfs_v2 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
550
+
551
+ # Setup FeatureStore for this repository.
552
+ >>> fs.setup(perm_size='50e6', spool_size='50e6')
553
+ True
554
+
555
+ >>> fs
556
+ VantageFeatureStore(vfs_v2)-v2.0.0
557
+
558
+ """
559
+ self._logger.info(f"Setting up FeatureStore for repository '{self.__repo}' with perm_size: {perm_size}, spool_size: {spool_size}")
560
+
561
+ # If repo does not exist, then create it.
562
+ if not self.__repo_exists:
563
+ self._logger.info(f"Creating database '{self.__repo}' as it does not exist")
564
+ _create_database(self.__repo, perm_size, spool_size)
565
+
566
+ # Check whether version table exists or not. If exist, assume all
567
+ # tables are available.
568
+ all_tables_exist = get_connection().dialect.has_table(
569
+ get_connection(), self.__table_names['version'], schema=self.__repo)
570
+ self._logger.debug(f"Version table exists: {all_tables_exist}")
571
+
572
+ if not all_tables_exist:
573
+ self._logger.info("Creating FeatureStore database objects (tables, triggers, views)")
574
+ # Create the object tables.
575
+ for table_spec, table_name in EFS_TABLES.items():
576
+ self._logger.debug(f"Creating table: {table_name}")
577
+ execute_sql(table_spec.format(self.__repo, table_name))
578
+ self._logger.debug("All required tables created successfully")
579
+ # Create the Triggers.
580
+ for trigger_spec, trg_name in EFS_TRIGGERS.items():
581
+ self._logger.debug(f"Creating trigger: {trg_name}")
582
+ alter_name = trg_name.split('_trg')[0]
583
+ insert_name = self.__repo+'.'+alter_name+'_staging'
584
+ execute_sql(trigger_spec.format(self.__repo, trg_name,
585
+ alter_name, insert_name))
586
+ self._logger.debug("All required triggers created successfully")
587
+
588
+ # Create feature versions view.
589
+ self._logger.debug("Creating feature versions view")
590
+ sql = EFS_FEATURE_VERSION.format(self.__repo,
591
+ EFS_DB_COMPONENTS['feature_version'],
592
+ self.__repo,
593
+ self.__table_names['feature_process']
594
+ )
595
+ execute_sql(sql)
596
+ self._logger.debug("Feature versions view created successfully")
597
+
598
+ # After the setup is done, populate the version.
599
+ self._logger.debug("Populating version table")
600
+ insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, self.__table_names['version'])
601
+ execute_sql(insert_model, (EFS_VERSION_, datetime.datetime.now()))
602
+ self._logger.debug("Version table populated successfully")
603
+
604
+ # Create the data domain in _efs_data_domain table.
605
+ self.__add_data_domain()
606
+ self._logger.debug("FeatureStore setup process completed successfully")
607
+
608
+ if self.__repo_exists and all_tables_exist:
609
+ self._logger.info(f"FeatureStore is already setup for repository '{self.__repo}'")
610
+ print("EFS is already setup for the repo {}.".format(self.__repo))
611
+
612
+ # Set the repo_exists to True
613
+ self.__repo_exists = True
614
+ self._logger.info(f"FeatureStore setup completed successfully for repository '{self.__repo}'")
615
+ return True
616
+
617
+ @property
618
+ def grant(self):
619
+ """
620
+ DESCRIPTION:
621
+ Grants access on FeatureStore.
622
+ Note:
623
+ One must have admin access to grant access.
624
+
625
+ PARAMETERS:
626
+ None
627
+
628
+ RETURNS:
629
+ bool
630
+
631
+ RAISES:
632
+ OperationalError
633
+
634
+ EXAMPLES:
635
+ >>> from teradataml import FeatureStore
636
+ # Create FeatureStore for repo 'vfs_v2'.
637
+ >>> fs = FeatureStore("vfs_v2")
638
+ Repo vfs_v2 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
639
+
640
+ # Setup FeatureStore for this repository.
641
+ >>> fs.setup()
642
+ True
643
+
644
+ # Example 1: Grant read access on FeatureStore to user 'BoB'.
645
+ >>> fs.grant.read('BoB')
646
+ True
647
+
648
+ # Example 2: Grant write access on FeatureStore to user 'BoB'.
649
+ >>> fs.grant.write('BoB')
650
+ True
651
+
652
+ # Example 3: Grant read and write access on FeatureStore to user 'BoB'.
653
+ >>> fs.grant.read_write('BoB')
654
+ True
655
+
656
+ """
657
+ self._logger.info(f"Granting access for repository: {self.__repo}")
658
+ return Grant(objects=AccessQueries,
659
+ database=self.__repo)
660
+
661
+ @property
662
+ def revoke(self):
663
+ """
664
+ DESCRIPTION:
665
+ Revokes access on FeatureStore.
666
+ Note:
667
+ One must have admin access to revoke access.
668
+
669
+ PARAMETERS:
670
+ None
671
+
672
+ RETURNS:
673
+ bool
674
+
675
+ RAISES:
676
+ OperationalError
677
+
678
+ EXAMPLES:
679
+ >>> from teradataml import FeatureStore
680
+ # Create FeatureStore for repo 'vfs_v1'.
681
+ >>> fs = FeatureStore("vfs_v1")
682
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
683
+
684
+ # Setup FeatureStore for this repository.
685
+ >>> fs.setup()
686
+ True
687
+
688
+ # Example 1: Revoke read access on FeatureStore from user 'BoB'.
689
+ >>> fs.revoke.read('BoB')
690
+ True
691
+
692
+ # Example 2: Revoke write access on FeatureStore from user 'BoB'.
693
+ >>> fs.revoke.write('BoB')
694
+ True
695
+
696
+ # Example 3: Revoke read and write access on FeatureStore from user 'BoB'.
697
+ >>> fs.revoke.read_write('BoB')
698
+ True
699
+ """
700
+ self._logger.info(f"Revoking access for repository: {self.__repo}")
701
+ return Revoke(objects=AccessQueries,
702
+ database=self.__repo)
703
+
704
+ def repair(self):
705
+ """
706
+ DESCRIPTION:
707
+ Repairs the existing repo.
708
+ Notes:
709
+ * The method checks for the corresponding missing database objects which are
710
+ required for FeatureStore. If any of the database object is not available,
711
+ then it tries to create the object.
712
+ * The method repairs only the underlying tables and not data inside the
713
+ corresponding table.
714
+
715
+ PARAMETERS:
716
+ None
717
+
718
+ RETURNS:
719
+ bool
720
+
721
+ RAISES:
722
+ None
723
+
724
+ EXAMPLES:
725
+ # Example 1: Repair FeatureStore repo 'vfs_v1'.
726
+ # Create FeatureStore for repo 'vfs_v1'.
727
+ >>> from teradataml import FeatureStore
728
+ >>> fs = FeatureStore("vfs_v1")
729
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
730
+
731
+ # Setup FeatureStore for this repository.
732
+ >>> fs.setup()
733
+
734
+ # Drop the data_source_staging table to simulate the missing object.
735
+ >>> from teradataml import db_drop_table
736
+ >>> db_drop_table(schema_name='vfs_v1', table_name=EFS_DB_COMPONENTS['data_source_staging'])
737
+
738
+ # Verify the missing object by creating FeatureStore again.
739
+ >>> fs = FeatureStore("vfs_v1")
740
+ Some of the feature store objects are missing. Run FeatureStore.repair() to create missing objects.
741
+
742
+ >>> fs.repair()
743
+ Successfully repaired the following objects: _efs_data_source_staging
744
+ True
745
+ """
746
+ self._logger.info(f"Starting repair process for FeatureStore repository '{self.__repo}'")
747
+
748
+ # Check whether the repo exists or not.
749
+ if not self.__repo_exists:
750
+ self._logger.debug(f"Repository '{self.__repo}' does not exist - cannot repair")
751
+ print("Repo '{}' does not exist. Run FeatureStore.setup() " \
752
+ "to create the repo and setup FeatureStore.".format(self.__repo))
753
+ return False
754
+
755
+ # Get all existing EFS tables in the repo
756
+ existing_tabs = db_list_tables(schema_name=self.__repo, object_name='_efs%')
757
+ existing_tables = set(existing_tabs['TableName'].tolist())
758
+ self._logger.debug(f"Found {len(existing_tables)} existing EFS tables: {existing_tables}")
759
+
760
+ # Get non-existing tables in the order of EFS_TABLES.values()
761
+ non_existing_tables = {
762
+ table_spec: table_name
763
+ for table_spec, table_name in EFS_TABLES.items()
764
+ if table_name not in existing_tables
765
+ }
766
+ self._logger.debug(f"Missing tables: {list(non_existing_tables.values())}")
767
+
768
+ # Get all existing EFS triggers in the repo
769
+ sql = SQLBundle()._get_sql_query(SQLConstants.SQL_LIST_TRIGGERS).format(self.__repo, '_efs%')
770
+ existing_triggers = {row[0] for row in execute_sql(sql).fetchall()}
771
+ self._logger.debug(f"Found {len(existing_triggers)} existing EFS triggers: {existing_triggers}")
772
+
773
+ # Get non-existing triggers in the order of EFS_TRIGGERS.values()
774
+ non_existing_triggers = {
775
+ trigger_spec: trigger_name
776
+ for trigger_spec, trigger_name in EFS_TRIGGERS.items()
777
+ if trigger_name not in existing_triggers
778
+ }
779
+ self._logger.debug(f"Missing triggers: {list(non_existing_triggers.values())}")
780
+
781
+ # Check if feature_version view exists (it shows up in existing_tables from db_list_tables)
782
+ feature_version_exists = self.__table_names['feature_version'] in existing_tables
783
+ self._logger.debug(f"Feature version view exists: {feature_version_exists}")
784
+
785
+ # Return False only if all tables, triggers, and views exist
786
+ if not non_existing_tables and not non_existing_triggers and feature_version_exists:
787
+ self._logger.info(f"Repository '{self.__repo}' is complete and does not need repair")
788
+ print("repo '{}' is ready to use and do not need any repair.".format(self.__repo))
789
+ return False
790
+
791
+ failed_creation = []
792
+ created = []
793
+ # Iterating over EFS_TABLES based on the non-existing tables
794
+ for table_spec, table_name in non_existing_tables.items():
795
+ try:
796
+ self._logger.debug(f"Creating missing table: {table_name}")
797
+ execute_sql(table_spec.format(self.__repo, table_name))
798
+ created.append(table_name)
799
+ except Exception as e:
800
+ # If any table creation fails, then add it to the failed list
801
+ self._logger.debug(f"Failed to create table '{table_name}': {e}")
802
+ failed_creation.append((f"Table '{table_name}'", str(e)))
803
+
804
+ # Iterating over EFS_TRIGGERS based on the non-existing triggers
805
+ for trigger_spec, trigger_name in non_existing_triggers.items():
806
+ alter_name = trigger_name.split('_trg')[0]
807
+ insert_name = self.__repo + '.' + alter_name + '_staging'
808
+ try:
809
+ self._logger.debug(f"Creating missing trigger: {trigger_name}")
810
+ execute_sql(trigger_spec.format(self.__repo, trigger_name,
811
+ alter_name, insert_name))
812
+ created.append(trigger_name)
813
+ except Exception as e:
814
+ # If any trigger creation fails, then add it to the failed list
815
+ self._logger.debug(f"Failed to create trigger '{trigger_name}': {e}")
816
+ failed_creation.append((f"Trigger '{trigger_name}'", str(e)))
817
+
818
+ # Create feature versions view if it doesn't exist
819
+ if not feature_version_exists:
820
+ try:
821
+ self._logger.debug("Creating missing feature versions view")
822
+ sql = EFS_FEATURE_VERSION.format(self.__repo,
823
+ EFS_DB_COMPONENTS['feature_version'],
824
+ self.__repo,
825
+ self.__table_names['feature_process'])
826
+ execute_sql(sql)
827
+ created.append(EFS_DB_COMPONENTS['feature_version'])
828
+ except Exception as e:
829
+ self._logger.debug(f"Failed to create feature versions view: {e}")
830
+ failed_creation.append((f"View '{EFS_DB_COMPONENTS['feature_version']}'", str(e)))
831
+
832
+ # If any of the table or trigger creation fails, then return False
833
+ if failed_creation:
834
+ self._logger.debug(f"Repair completed with failures: {len(failed_creation)} objects could not be created")
835
+ print("The following objects could not be repaired:")
836
+ for obj, reason in failed_creation:
837
+ print(f" - {obj}: {reason}")
838
+ return False
839
+
840
+ self._logger.info(f"Repair completed successfully: created {len(created)} objects: {created}")
841
+ print("Successfully repaired the following objects: {}".format(", ".join(created)))
842
+ return True
843
+
844
+ def list_features(self, archived=False) -> DataFrame:
845
+ """
846
+ DESCRIPTION:
847
+ List all the features.
848
+
849
+ PARAMETERS:
850
+ archived:
851
+ Optional Argument.
852
+ Specifies whether to list effective features or archived features.
853
+ When set to False, effective features in FeatureStore are listed,
854
+ otherwise, archived features are listed.
855
+ Default Value: False
856
+ Types: bool
857
+
858
+ RETURNS:
859
+ teradataml DataFrame
860
+
861
+ RAISES:
862
+ None
863
+
864
+ EXAMPLES:
865
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
866
+ # Create teradataml DataFrame.
867
+ >>> load_example_data("dataframe", "sales")
868
+ >>> df = DataFrame("sales")
869
+
870
+ # Create FeatureStore for repo 'vfs_v1'.
871
+ >>> fs = FeatureStore("vfs_v1")
872
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
873
+ # Setup FeatureStore for this repository.
874
+ >>> fs.setup()
875
+ True
876
+
877
+ # Create a FeatureGroup from teradataml DataFrame.
878
+ >>> fg = FeatureGroup.from_DataFrame(name='sales',
879
+ ... entity_columns='accounts',
880
+ ... df=df,
881
+ ... timestamp_column='datetime')
882
+ # Apply the FeatureGroup to FeatureStore.
883
+ >>> fs.apply(fg)
884
+ True
885
+
886
+ # Example 1: List all the effective Features in the repo 'vfs_v1'.
887
+ >>> fs.list_features()
888
+ id column_name description tags data_type feature_type status creation_time modified_time group_name
889
+ name data_domain
890
+ Apr ALICE 4 Apr None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:31.262501 None sales
891
+ Jan ALICE 2 Jan None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:30.056273 None sales
892
+ Mar ALICE 3 Mar None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:30.678060 None sales
893
+ Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 03:17:29.403242 None sales
894
+
895
+ # Example 2: List all the archived Features in the repo 'vfs_v1'.
896
+ # Note: Feature can only be archived when it is not associated with any Group.
897
+ # Let's remove Feature 'Feb' from FeatureGroup.
898
+ >>> fg.remove_feature(fs.get_feature('Feb'))
899
+ True
900
+
901
+ # Apply the modified FeatureGroup to FeatureStore.
902
+ >>> fs.apply(fg)
903
+ True
904
+
905
+ # Archive Feature 'Feb'.
906
+ >>> fs.archive_feature('Feb')
907
+ Feature 'Feb' is archived.
908
+ True
909
+
910
+ # List all the archived Features in the repo 'vfs_v1'.
911
+ >>> fs.list_features(archived=True)
912
+ id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
913
+ 0 1 Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 03:17:29.403242 None 2025-07-28 03:19:58.950000 sales
914
+ >>>
915
+ """
916
+ self._logger.info(f"Listing features from repository '{self.__repo}', data_domain '{self.__data_domain}', archived: {archived}")
917
+ result = self.__get_archived_features_df() if archived else self.__get_features_df()
918
+ self._logger.debug(f"Retrieved features:\n{result}")
919
+ return result
920
+
921
+ def list_entities(self, archived=False) -> DataFrame:
922
+ """
923
+ DESCRIPTION:
924
+ List all the entities.
925
+
926
+ PARAMETERS:
927
+ archived:
928
+ Optional Argument.
929
+ Specifies whether to list effective entities or archived entities.
930
+ When set to False, effective entities in FeatureStore are listed,
931
+ otherwise, archived entities are listed.
932
+ Default Value: False
933
+ Types: bool
934
+
935
+ RETURNS:
936
+ teradataml DataFrame
937
+
938
+ RAISES:
939
+ None
940
+
941
+ EXAMPLES:
942
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
943
+ # Create FeatureStore for repo 'vfs_v1'.
944
+ >>> fs = FeatureStore("vfs_v1")
945
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
946
+ # Setup FeatureStore for this repository.
947
+ >>> fs.setup()
948
+ True
949
+
950
+ # Create teradataml DataFrame.
951
+ >>> load_example_data("dataframe", "sales")
952
+ >>> df = DataFrame("sales")
953
+
954
+ # Create a FeatureGroup from teradataml DataFrame.
955
+ >>> fg = FeatureGroup.from_DataFrame(name='sales',
956
+ ... entity_columns='accounts',
957
+ ... df=df,
958
+ ... timestamp_column='datetime')
959
+ # Apply the FeatureGroup to FeatureStore.
960
+ >>> fs.apply(fg)
961
+ True
962
+
963
+ # Example 1: List all the effective Entities in the repo 'vfs_v1'.
964
+ >>> fs.list_entities()
965
+ description creation_time modified_time entity_column
966
+ name data_domain
967
+ sales ALICE None 2025-07-28 03:17:31.558796 2025-07-28 03:19:41.233953 accounts
968
+ >>>
969
+
970
+ # Example 2: List all the archived Entities in the repo 'vfs_v1'.
971
+ # Note: Entity cannot be archived if it is a part of FeatureGroup.
972
+ # First create another Entity, and update FeatureGroup with
973
+ # other Entity. Then archive Entity 'sales'.
974
+ >>> entity = Entity('store_sales', columns=df.accounts)
975
+ # Update new entity to FeatureGroup.
976
+ >>> fg.apply(entity)
977
+ True
978
+
979
+ # Update FeatureGroup to FeatureStore. This will update Entity
980
+ # from 'sales' to 'store_sales' for FeatureGroup 'sales'.
981
+ >>> fs.apply(fg)
982
+ True
983
+
984
+ # Let's archive Entity 'sales' since it is not part of any FeatureGroup.
985
+ >>> fs.archive_entity('sales')
986
+ Entity 'sales' is archived.
987
+ True
988
+ >>>
989
+
990
+ # List the archived entities.
991
+ >>> fs.list_entities(archived=True)
992
+ description creation_time modified_time entity_column
993
+ name data_domain
994
+ store_sales ALICE None 2025-07-28 03:23:40.322424 None accounts
995
+ >>>
996
+ """
997
+ self._logger.info(f"Listing entities from repository '{self.__repo}', data_domain '{self.__data_domain}', archived: {archived}")
998
+ result = self.__get_archived_entity_df() if archived else self.__get_entity_df()
999
+ self._logger.debug(f"Retrieved entities:\n{result}")
1000
+ return result
1001
+
1002
+ def list_data_sources(self, archived=False) -> DataFrame:
1003
+ """
1004
+ DESCRIPTION:
1005
+ List all the Data Sources.
1006
+
1007
+ PARAMETERS:
1008
+ archived:
1009
+ Optional Argument.
1010
+ Specifies whether to list effective data sources or archived data sources.
1011
+ When set to False, effective data sources in FeatureStore are listed,
1012
+ otherwise, archived data sources are listed.
1013
+ Default Value: False
1014
+ Types: bool
1015
+
1016
+ RETURNS:
1017
+ teradataml DataFrame
1018
+
1019
+ RAISES:
1020
+ None
1021
+
1022
+ EXAMPLES:
1023
+ >>> from teradataml import DataSource, FeatureStore, load_example_data
1024
+ # Create teradataml DataFrame.
1025
+ >>> load_example_data("dataframe", "admissions_train")
1026
+ >>> admissions = DataFrame("admissions_train")
1027
+
1028
+ # Create FeatureStore for repo 'vfs_v1'.
1029
+ >>> fs = FeatureStore("vfs_v1")
1030
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1031
+ # Setup FeatureStore for this repository.
1032
+ >>> fs.setup()
1033
+ True
1034
+
1035
+ # Create DataSource using teradataml DataFrame.
1036
+ >>> ds = DataSource(name='admissions', source=admissions)
1037
+ # Apply the DataSource to FeatureStore.
1038
+ >>> fs.apply(ds)
1039
+ True
1040
+
1041
+ # Example 1: List all the effective DataSources in the repo 'vfs_v1'.
1042
+ >>> fs.list_data_sources()
1043
+ description timestamp_column source creation_time modified_time
1044
+ name data_domain
1045
+ admissions ALICE None None select * from "admissions_train" 2025-07-28 03:26:53.507807 None
1046
+
1047
+ # Example 2: List all the archived DataSources in the repo 'vfs_v1'.
1048
+ # Let's first archive the DataSource.
1049
+ >>> fs.archive_data_source('admissions')
1050
+ DataSource 'admissions' is archived.
1051
+ True
1052
+
1053
+ # List archived DataSources.
1054
+ >>> fs.list_data_sources(archived=True)
1055
+ name data_domain description timestamp_column source creation_time modified_time archived_time
1056
+ 0 admissions ALICE None None select * from "admissions_train" 2025-07-28 03:26:53.507807 None 2025-07-28 03:28:17.160000
1057
+ >>>
1058
+ """
1059
+ self._logger.info(f"Listing data sources from repository '{self.__repo}', data_domain '{self.__data_domain}', archived: {archived}")
1060
+ result = self.__get_archived_data_source_df() if archived else self.__get_data_source_df()
1061
+ self._logger.debug(f"Retrieved data sources:\n{result}")
1062
+ return result
1063
+
1064
+ def list_feature_groups(self, archived=False) -> DataFrame:
1065
+ """
1066
+ DESCRIPTION:
1067
+ List all the FeatureGroups.
1068
+
1069
+ PARAMETERS:
1070
+ archived:
1071
+ Optional Argument.
1072
+ Specifies whether to list effective feature groups or archived feature groups.
1073
+ When set to False, effective feature groups in FeatureStore are listed,
1074
+ otherwise, archived feature groups are listed.
1075
+ Default Value: False
1076
+ Types: bool
1077
+
1078
+ RETURNS:
1079
+ teradataml DataFrame
1080
+
1081
+ RAISES:
1082
+ None
1083
+
1084
+ EXAMPLES:
1085
+ >>> from teradataml import FeatureGroup, FeatureStore, load_example_data
1086
+ # Create teradataml DataFrame.
1087
+ >>> load_example_data("dataframe", "admissions_train")
1088
+ >>> admissions=DataFrame("admissions_train")
1089
+
1090
+ # Create FeatureStore for repo 'vfs_v1'.
1091
+ >>> fs = FeatureStore("vfs_v1")
1092
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1093
+ # Setup FeatureStore for this repository.
1094
+ >>> fs.setup()
1095
+ True
1096
+
1097
+ # Create a FeatureGroup from DataFrame.
1098
+ >>> fg = FeatureGroup.from_DataFrame("admissions", df=admissions, entity_columns='id')
1099
+ # Apply FeatureGroup to FeatureStore.
1100
+ >>> fs.apply(fg)
1101
+ True
1102
+
1103
+ # Example 1: List all the effective FeatureGroups in the repo 'vfs_v1'.
1104
+ >>> fs.list_feature_groups()
1105
+ description data_source_name entity_name creation_time modified_time
1106
+ name data_domain
1107
+ admissions ALICE None admissions admissions 2025-07-28 03:30:04.115331 None
1108
+
1109
+ # Example 2: List all the archived FeatureGroups in the repo 'vfs_v1'.
1110
+ # Let's first archive the FeatureGroup.
1111
+ >>> fs.archive_feature_group("admissions")
1112
+ True
1113
+
1114
+ # List archived FeatureGroups.
1115
+ >>> fs.list_feature_groups(archived=True)
1116
+ name data_domain description data_source_name entity_name creation_time modified_time archived_time
1117
+ 0 admissions ALICE None admissions admissions 2025-07-28 03:30:04.115331 None 2025-07-28 03:31:04.550000
1118
+ >>>
1119
+ """
1120
+ self._logger.info(f"Listing feature groups from repository '{self.__repo}', data_domain '{self.__data_domain}', archived: {archived}")
1121
+ result = self.__get_archived_feature_group_df() if archived else self.__get_feature_group_df()
1122
+ self._logger.debug(f"Retrieved feature groups:\n{result}")
1123
+ return result
1124
+
1125
+ def list_data_domains(self) -> DataFrame:
1126
+ """
1127
+ DESCRIPTION:
1128
+ Lists all the data domains.
1129
+
1130
+ PARAMETERS:
1131
+ None
1132
+
1133
+ RETURNS:
1134
+ teradataml DataFrame
1135
+
1136
+ RAISES:
1137
+ None
1138
+
1139
+ EXAMPLES:
1140
+ # Example 1: List all the data domains in the repo 'vfs_v1'.
1141
+ >>> from teradataml import FeatureStore
1142
+ # Create FeatureStore for repo 'vfs_v1' with data_domain 'd1'.
1143
+ >>> fs = FeatureStore("vfs_v1", data_domain='d1')
1144
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1145
+
1146
+ # List all the data domains in the repo 'vfs_v1'.
1147
+ >>> fs.list_data_domains()
1148
+ name created_time
1149
+ 0 d1 2025-04-30 11:21:40.123456
1150
+ """
1151
+ self._logger.info("Listing all the data domains in the repo '{}'.".format(self.__repo))
1152
+ res = self.__get_data_domain_df()
1153
+
1154
+ self._logger.debug("Data domains listed: \n{}".format(res))
1155
+ return res
1156
+
1157
+ def list_feature_processes(self, archived=False) -> DataFrame:
1158
+ """
1159
+ DESCRIPTION:
1160
+ Lists all the feature processes.
1161
+
1162
+ PARAMETERS:
1163
+ archived:
1164
+ Optional Argument.
1165
+ Specifies whether to retrieve archived feature processes or not.
1166
+ When set to True, archived feature processes in FeatureStore are listed.
1167
+ Otherwise, all feature processes are listed.
1168
+ Default Value: False
1169
+ Types: bool
1170
+
1171
+ RETURNS:
1172
+ teradataml DataFrame
1173
+
1174
+ RAISES:
1175
+ None
1176
+
1177
+ EXAMPLES:
1178
+ # Example 1: List all the feature processes in the repo 'vfs_v1'.
1179
+ >>> from teradataml import FeatureStore
1180
+
1181
+ # Create FeatureStore 'vfs_v1' or use existing one.
1182
+ >>> fs = FeatureStore("vfs_v1")
1183
+ FeatureStore is ready to use.
1184
+
1185
+ # Load the sales data.
1186
+ >>> load_example_data("dataframe", "sales")
1187
+ >>> df = DataFrame("sales")
1188
+
1189
+ # Create a feature process.
1190
+ >>> from teradataml import FeatureProcess
1191
+ >>> fp = FeatureProcess(repo="vfs_v1",
1192
+ ... data_domain='sales',
1193
+ ... object=df,
1194
+ ... entity="accounts",
1195
+ ... features=["Jan", "Feb", "Mar", "Apr"])
1196
+ >>> fp.run()
1197
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
1198
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
1199
+
1200
+ # List all the feature processes in the repo 'vfs_v1'.
1201
+ >>> fs.list_feature_processes()
1202
+ description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
1203
+ process_id
1204
+ 5747082b-4acb-11f0-a2d7-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-06-16 16:02:55.260000+00: 9999-12-31 23:59:59.999999+00:
1205
+
1206
+ # Example 2: List all the archived feature processes in the repo 'vfs_v1'.
1207
+
1208
+ # Let's check the archived feature processes before archiving feature process.
1209
+ >>> fs.list_feature_processes(archived=True)
1210
+ process_id start_time end_time status filter as_of_start as_of_end failure_reason
1211
+
1212
+ # Archive the feature process by passing the process_id.
1213
+ >>> fs.archive_feature_process('5747082b-4acb-11f0-a2d7-f020ffe7fe09')
1214
+ Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
1215
+ Feature 'Feb' is archived from metadata.
1216
+ Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
1217
+ Feature 'Jan' is archived from metadata.
1218
+ Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
1219
+ Feature 'Mar' is archived from metadata.
1220
+ Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
1221
+ Feature 'Apr' is archived from metadata.
1222
+ FeatureProcess with process id '5747082b-4acb-11f0-a2d7-f020ffe7fe09' is archived.
1223
+ True
1224
+
1225
+ # List all the archived feature processes in the repo 'vfs_v1'.
1226
+ >>> fs.list_feature_processes(archived=True)
1227
+ description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
1228
+ process_id
1229
+ 5747082b-4acb-11f0-a2d7-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-06-16 16:02:55.260000+00: 2025-06-16 16:04:32.260000+00:
1230
+
1231
+ """
1232
+ self._logger.info(f"Listing feature processes from repository '{self.__repo}', data_domain '{self.__data_domain}', archived: {archived}")
1233
+
1234
+ validate_params = []
1235
+ validate_params.append(["archived", archived, True, bool, True])
1236
+ # Validate argument types
1237
+ _Validators._validate_function_arguments(validate_params)
1238
+
1239
+ f_process_df = self.__get_without_valid_period_df(self.__get_feature_process_df())
1240
+ f_process_df = f_process_df[f_process_df.data_domain == self.__data_domain]
1241
+
1242
+ if archived:
1243
+ # Filter out the active feature process. Only archived features are returned.
1244
+ f_process_df = f_process_df[(Col("valid_end") <= Col('current_timestamp'))]
1245
+ self._logger.debug("Filtered to show only archived feature processes")
1246
+
1247
+ self._logger.debug(f"Retrieved feature processes:\n{f_process_df}")
1248
+ return f_process_df
1249
+
1250
+ def list_feature_runs(self):
1251
+ """
1252
+ DESCRIPTION:
1253
+ Lists all the feature runs in the FeatureStore.
1254
+
1255
+ PARAMETERS:
1256
+ None
1257
+
1258
+ RETURNS:
1259
+ teradataml DataFrame
1260
+
1261
+ RAISES:
1262
+ None
1263
+
1264
+ EXAMPLES:
1265
+ # Example 1: List all the feature runs in the repo 'vfs_v1'.
1266
+ >>> from teradataml import FeatureStore
1267
+
1268
+ # Create a FeatureStore 'vfs_v1' or use existing one.
1269
+ >>> fs = FeatureStore("vfs_v1")
1270
+ FeatureStore is ready to use.
1271
+
1272
+ # Load the sales data.
1273
+ >>> load_example_data("dataframe", "sales")
1274
+ >>> df = DataFrame("sales")
1275
+
1276
+ # Create a feature process.
1277
+ >>> from teradataml import FeatureProcess
1278
+ >>> fp = FeatureProcess(repo="vfs_v1",
1279
+ ... data_domain='test_domain',
1280
+ ... object=df,
1281
+ ... entity='accounts',
1282
+ ... features=['Mar', 'Apr'])
1283
+ >>> fp.run(filters=[df.accounts=='Alpha Co', "accounts='Jones LLC'"])
1284
+ Process '11b62599-692f-11f0-ad19-f020ffe7fe09' started.
1285
+ Ingesting the features for filter 'accounts = 'Alpha Co'' to catalog.
1286
+ Ingesting the features for filter 'accounts='Jones LLC'' to catalog.
1287
+ Process '11b62599-692f-11f0-ad19-f020ffe7fe09' completed.
1288
+ True
1289
+
1290
+ # List all the feature runs in the repo 'vfs_v1'.
1291
+ >>> fs.list_feature_runs()
1292
+ process_id data_domain start_time end_time status filter as_of_start as_of_end failure_reason
1293
+ run_id
1294
+ 1 11b62599-692f-11f0-ad19-f020ffe7fe09 test_domain 2025-07-25 08:12:13.001968 2025-07-25 08:12:13.001968 completed accounts = 'Alpha Co', accounts='Jones LLC' None None None
1295
+ """
1296
+ self._logger.info(f"Listing feature runs from repository '{self.__repo}'")
1297
+ result = self.__get_feature_runs_df()
1298
+ self._logger.debug(f"Retrieved feature runs:\n{result}")
1299
+ return result
1300
+
1301
+ def list_dataset_catalogs(self) -> DataFrame:
1302
+ """
1303
+ DESCRIPTION:
1304
+ Lists all the dataset catalogs.
1305
+
1306
+ PARAMETERS:
1307
+ None
1308
+
1309
+ RETURNS:
1310
+ teradataml DataFrame
1311
+
1312
+ RAISES:
1313
+ None
1314
+
1315
+ EXAMPLES:
1316
+ # Example 1: List all the dataset catalogs in the repo 'vfs_v1'.
1317
+ >>> from teradataml import FeatureStore
1318
+
1319
+ # Create FeatureStore 'vfs_v1' or use existing one.
1320
+ >>> fs = FeatureStore("vfs_v1", data_domain='sales')
1321
+ FeatureStore is ready to use.
1322
+
1323
+ # Load the sales data.
1324
+ >>> load_example_data("dataframe", "sales")
1325
+ >>> df = DataFrame("sales")
1326
+
1327
+ # Create a feature process.
1328
+ >>> from teradataml import FeatureProcess
1329
+ >>> fp = FeatureProcess(repo="vfs_v1",
1330
+ ... data_domain='sales',
1331
+ ... object=df,
1332
+ ... entity="accounts",
1333
+ ... features=["Jan", "Feb", "Mar", "Apr"])
1334
+ >>> fp.run()
1335
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
1336
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
1337
+
1338
+ # create a dataset catalog.
1339
+ >>> from teradataml import DatasetCatalog
1340
+ >>> dc = DatasetCatalog(repo='vfs_v1', data_domain='sales')
1341
+ >>> dataset = dc.build_dataset(entity='accounts',
1342
+ ... selected_features = {
1343
+ ... 'Jan': '5747082b-4acb-11f0-a2d7-f020ffe7fe09',
1344
+ ... 'Feb': '5747082b-4acb-11f0-a2d7-f020ffe7fe09'},
1345
+ ... view_name='ds_jan_feb',
1346
+ ... description='Dataset with Jan and Feb features')
1347
+
1348
+ # List all the dataset catalogs in the repo 'vfs_v1'.
1349
+ >>> fs.list_dataset_catalogs()
1350
+ data_domain name entity_name database_name description valid_start valid_end
1351
+ id
1352
+ 4f763a7b-8920-448c-87af-432e7d36c9cb sales ds_jan_feb accounts vfs_v1 Dataset with Jan and Feb features 2025-06-16 16:15:17.577637+00: 9999-12-31 23:59:59.999999+00:
1353
+ """
1354
+ self._logger.info(f"Listing dataset catalogs from repository '{self.__repo}'")
1355
+ result = self.__get_without_valid_period_df(self.__get_dataset_catalog_df())
1356
+ self._logger.debug(f"Retrieved dataset catalogs:\n{result}")
1357
+ return result
1358
+
1359
+ def get_feature(self, name):
1360
+ """
1361
+ DESCRIPTION:
1362
+ Retrieve the feature.
1363
+
1364
+ PARAMETERS:
1365
+ name:
1366
+ Required Argument.
1367
+ Specifies the name of the feature to get.
1368
+ Types: str
1369
+
1370
+ RETURNS:
1371
+ Feature.
1372
+
1373
+ RAISES:
1374
+ TeradataMLException
1375
+
1376
+ EXAMPLES:
1377
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
1378
+ # Create DataFrame on sales data.
1379
+ >>> load_example_data("dataframe", "sales")
1380
+ >>> df = DataFrame("sales")
1381
+ >>> df
1382
+ Feb Jan Mar Apr datetime
1383
+ accounts
1384
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
1385
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
1386
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
1387
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
1388
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
1389
+
1390
+ # Create a FeatureStore for repo 'vfs_v1'.
1391
+ >>> fs = FeatureStore("vfs_v1")
1392
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1393
+ # Setup FeatureStore for this repository.
1394
+ >>> fs.setup()
1395
+ True
1396
+
1397
+ # Create Feature for column 'Mar' with name 'sales_mar'.
1398
+ >>> feature = Feature('sales_mar', column=df.Mar)
1399
+
1400
+ # Apply the Feature to FeatureStore.
1401
+ >>> fs.apply(feature)
1402
+ True
1403
+
1404
+ # Get the feature 'sales_mar' from repo 'vfs_v1'.
1405
+ >>> feature = fs.get_feature('sales_mar')
1406
+ >>> feature
1407
+ Feature(name=sales_mar)
1408
+ """
1409
+ self._logger.info(f"Getting feature '{name}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
1410
+
1411
+ argument_validation_params = []
1412
+ argument_validation_params.append(["name", name, False, (str), True])
1413
+
1414
+ # Validate argument types
1415
+ _Validators._validate_function_arguments(argument_validation_params)
1416
+
1417
+ # Check if the feature exists in the current data domain.
1418
+ df = self.__get_features_wog_df()
1419
+ df = df[(df['name'] == name) &
1420
+ (df['data_domain'] == self.__data_domain)]
1421
+
1422
+ # If no records found, check if the feature exists in any domain.
1423
+ if df.shape[0] == 0:
1424
+ self._logger.debug(f"Feature '{name}' not found in current data domain '{self.__data_domain}', checking other domains")
1425
+ res = _FSUtils._get_data_domains(self.__repo, name, 'feature')
1426
+ if res:
1427
+ self._logger.debug(f"Feature '{name}' exists in other domains: {res}")
1428
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
1429
+ error_msg = Messages.get_message(msg_code, "Feature", "name '{}'".format(name),
1430
+ self.__data_domain, res)
1431
+ else:
1432
+ self._logger.debug(f"Feature '{name}' does not exist in any domain")
1433
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
1434
+ error_msg = Messages.get_message(msg_code, "Feature", "name '{}'".format(name),
1435
+ self.__data_domain)
1436
+ raise TeradataMlException(error_msg, msg_code)
1437
+
1438
+ self._logger.debug(f"Successfully retrieved feature '{name}' from data domain '{self.__data_domain}'")
1439
+ return Feature._from_df(df)
1440
+
1441
+ def get_group_features(self, group_name):
1442
+ """
1443
+ DESCRIPTION:
1444
+ Get the Features from the given feature group name.
1445
+
1446
+ PARAMETERS:
1447
+ group_name:
1448
+ Required Argument.
1449
+ Specifies the name of the group the feature belongs to.
1450
+ Types: str
1451
+
1452
+ RETURNS:
1453
+ List of Feature objects.
1454
+
1455
+ RAISES:
1456
+ TeradataMLException
1457
+
1458
+ EXAMPLES:
1459
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
1460
+
1461
+ # Create DataFrame on sales data.
1462
+ >>> load_example_data("dataframe", "sales")
1463
+ >>> df = DataFrame("sales")
1464
+ >>> df
1465
+ Feb Jan Mar Apr datetime
1466
+ accounts
1467
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
1468
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
1469
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
1470
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
1471
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
1472
+
1473
+ # Create FeatureStore for repo 'vfs_v1'.
1474
+ >>> fs = FeatureStore("vfs_v1")
1475
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1476
+ # Setup FeatureStore for this repository.
1477
+ >>> fs.setup()
1478
+ True
1479
+
1480
+ # Create FeatureGroup with name 'sales' from DataFrame.
1481
+ >>> fg = FeatureGroup.from_DataFrame(
1482
+ ... name="sales", df=df, entity_columns="accounts", timestamp_column="datetime")
1483
+ # Apply the FeatureGroup to FeatureStore.
1484
+ >>> fs.apply(fg)
1485
+ True
1486
+
1487
+ # Get all the features belongs to the group 'sales' from repo 'vfs_v1'.
1488
+ >>> features = fs.get_group_features('sales')
1489
+ >>> features
1490
+ [Feature(name=Jan), Feature(name=Feb), Feature(name=Apr), Feature(name=Mar)]
1491
+ >>>
1492
+ """
1493
+ argument_validation_params = []
1494
+ argument_validation_params.append(["group_name", group_name, False, (str), True])
1495
+
1496
+ # Validate argument types
1497
+ _Validators._validate_function_arguments(argument_validation_params)
1498
+
1499
+ self._logger.info(f"Getting features for group '{group_name}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
1500
+
1501
+ # Select active features.
1502
+ features_df = self.__get_features_df()
1503
+ features_df = features_df[((features_df.status != FeatureStatus.INACTIVE.name) &
1504
+ (features_df.group_name == group_name) &
1505
+ (features_df.data_domain == self.__data_domain))]
1506
+
1507
+ # Check if a feature with that group name exists or not. If not, raise error.
1508
+ if features_df.shape[0] == 0:
1509
+ self._logger.debug(f"No features found for group '{group_name}' in current data domain '{self.__data_domain}', checking other domains")
1510
+ res = _FSUtils._get_data_domains(self.__repo, group_name, 'group_features')
1511
+ if res:
1512
+ self._logger.debug(f"Features for group '{group_name}' exist in other domains: {res}")
1513
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
1514
+ error_msg = Messages.get_message(msg_code, "Features", "group name '{}'".format(group_name),
1515
+ self.__data_domain, res)
1516
+ else:
1517
+ self._logger.debug(f"No features found for group '{group_name}' in any domain")
1518
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
1519
+ error_msg = Messages.get_message(msg_code, "Features", "group name '{}'".format(group_name),
1520
+ self.__data_domain)
1521
+ raise TeradataMlException(error_msg, msg_code)
1522
+
1523
+ self._logger.debug(f"Successfully retrieved features for group '{group_name}':\n{features_df}")
1524
+ return Feature._from_df(features_df)
1525
+
1526
+ def get_feature_group(self, name):
1527
+ """
1528
+ DESCRIPTION:
1529
+ Retrieve the FeatureGroup using name.
1530
+
1531
+ PARAMETERS:
1532
+ name:
1533
+ Required Argument.
1534
+ Specifies the name of the feature group to be retrieved.
1535
+ Types: str
1536
+
1537
+ RETURNS:
1538
+ Object of FeatureGroup
1539
+
1540
+ RAISES:
1541
+ TeradataMLException
1542
+
1543
+ EXAMPLES:
1544
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
1545
+ # Create DataFrame on sales data.
1546
+ >>> load_example_data("dataframe", "sales")
1547
+ >>> df = DataFrame("sales")
1548
+ >>> df
1549
+ Feb Jan Mar Apr datetime
1550
+ accounts
1551
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
1552
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
1553
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
1554
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
1555
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
1556
+
1557
+ # Create FeatureStore for repo 'vfs_v1'.
1558
+ >>> fs = FeatureStore("vfs_v1")
1559
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1560
+ # Setup FeatureStore for this repository.
1561
+ >>> fs.setup()
1562
+ True
1563
+
1564
+ # Create FeatureGroup with name 'sales' from DataFrame.
1565
+ >>> fg = FeatureGroup.from_DataFrame(
1566
+ ... name="sales", df=df, entity_columns="accounts", timestamp_column="datetime")
1567
+ # Apply the FeatureGroup to FeatureStore.
1568
+ >>> fs.apply(fg)
1569
+ True
1570
+
1571
+ # Get FeatureGroup with group name 'sales' from repo 'vfs_v1'.
1572
+ >>> fg = fs.get_feature_group('sales')
1573
+ >>> fg
1574
+ FeatureGroup(sales, features=[Feature(name=Jan), Feature(name=Feb), Feature(name=Apr), Feature(name=Mar)], entity=Entity(name=sales), data_source=DataSource(name=sales))
1575
+ >>>
1576
+ """
1577
+ self._logger.info(f"Getting feature group '{name}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
1578
+
1579
+ argument_validation_params = []
1580
+ argument_validation_params.append(["name", name, False, (str), True])
1581
+
1582
+ # Validate argument types
1583
+ _Validators._validate_function_arguments(argument_validation_params)
1584
+
1585
+ df = self.list_feature_groups()
1586
+ df = df[(df['name'] == name) &
1587
+ (df['data_domain'] == self.__data_domain)]
1588
+
1589
+ # Check if a feature group with that name exists or not. If not, raise error.
1590
+ if df.shape[0] == 0:
1591
+ self._logger.debug(f"Feature group '{name}' not found in current data domain '{self.__data_domain}', checking other domains")
1592
+ res = _FSUtils._get_data_domains(self.__repo, name, 'feature_group')
1593
+ if res:
1594
+ self._logger.debug(f"Feature group '{name}' exists in other domains: {res}")
1595
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
1596
+ error_msg = Messages.get_message(msg_code, "FeatureGroup", "name '{}'".format(name),
1597
+ self.__data_domain, res)
1598
+ else:
1599
+ self._logger.debug(f"Feature group '{name}' does not exist in any domain")
1600
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
1601
+ error_msg = Messages.get_message(msg_code, "FeatureGroup", "name '{}'".format(name),
1602
+ self.__data_domain)
1603
+ raise TeradataMlException(error_msg, msg_code)
1604
+
1605
+ self._logger.debug(f"Successfully retrieved feature group '{name}' from data domain '{self.__data_domain}'")
1606
+ return FeatureGroup._from_df(df,
1607
+ self.__repo,
1608
+ self.__get_features_df(),
1609
+ self.__get_entity_df(),
1610
+ self.__get_data_source_df(),
1611
+ data_domain=self.__data_domain
1612
+ )
1613
+
1614
+ def get_entity(self, name):
1615
+ """
1616
+ DESCRIPTION:
1617
+ Get the entity from feature store.
1618
+
1619
+ PARAMETERS:
1620
+ name:
1621
+ Required Argument.
1622
+ Specifies the name of the entity.
1623
+ Types: str
1624
+
1625
+ RETURNS:
1626
+ Object of Entity.
1627
+
1628
+ RAISES:
1629
+ None
1630
+
1631
+ EXAMPLES:
1632
+ >>> from teradataml import DataFrame, Entity, FeatureStore, load_example_data
1633
+ # Create DataFrame on admissions data.
1634
+ >>> load_example_data("dataframe", "admissions_train")
1635
+ >>> df = DataFrame("admissions_train")
1636
+ >>> df
1637
+ masters gpa stats programming admitted
1638
+ id
1639
+ 34 yes 3.85 Advanced Beginner 0
1640
+ 32 yes 3.46 Advanced Beginner 0
1641
+ 11 no 3.13 Advanced Advanced 1
1642
+ 40 yes 3.95 Novice Beginner 0
1643
+ 38 yes 2.65 Advanced Beginner 1
1644
+ 36 no 3.00 Advanced Novice 0
1645
+ 7 yes 2.33 Novice Novice 1
1646
+ 26 yes 3.57 Advanced Advanced 1
1647
+ 19 yes 1.98 Advanced Advanced 0
1648
+ 13 no 4.00 Advanced Novice 1
1649
+
1650
+ # Create FeatureStore for repo 'vfs_v1'.
1651
+ >>> fs = FeatureStore("vfs_v1")
1652
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1653
+ # Setup FeatureStore for this repository.
1654
+ >>> fs.setup()
1655
+ True
1656
+
1657
+ # Create Entity for column 'id' with name 'admissions_id'.
1658
+ >>> entity = Entity(name='admissions_id', description="Entity for admissions", columns=df.id)
1659
+ # Apply the Entity to FeatureStore 'vfs_v1'.
1660
+ >>> fs.apply(entity)
1661
+ True
1662
+
1663
+ # Get the Entity 'admissions_id' from repo 'vfs_v1'
1664
+ >>> entity = fs.get_entity('admissions_id')
1665
+ >>> entity
1666
+ Entity(name=admissions_id)
1667
+ """
1668
+ self._logger.info(f"Getting entity '{name}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
1669
+
1670
+ argument_validation_params = []
1671
+ argument_validation_params.append(["name", name, False, (str), True])
1672
+
1673
+ # Validate argument types
1674
+ _Validators._validate_function_arguments(argument_validation_params)
1675
+
1676
+ df = self.__get_entity_df()
1677
+ df = df[(df['name'] == name) &
1678
+ (df['data_domain'] == self.__data_domain)]
1679
+
1680
+ # Check if entity with that name exists or not. If not, raise error.
1681
+ if df.shape[0] == 0:
1682
+ res = _FSUtils._get_data_domains(self.__repo, name, 'entity')
1683
+ if res:
1684
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
1685
+ error_msg = Messages.get_message(msg_code, "Entity", "name '{}'".format(name),
1686
+ self.__data_domain, res)
1687
+ else:
1688
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
1689
+ error_msg = Messages.get_message(msg_code, "Entity", "name '{}'".format(name),
1690
+ self.__data_domain)
1691
+ raise TeradataMlException(error_msg, msg_code)
1692
+
1693
+ return Entity._from_df(df)
1694
+
1695
+ def get_data_source(self, name):
1696
+ """
1697
+ DESCRIPTION:
1698
+ Get the data source from feature store.
1699
+
1700
+ PARAMETERS:
1701
+ name:
1702
+ Required Argument.
1703
+ Specifies the name of the data source.
1704
+ Types: str
1705
+
1706
+ RETURNS:
1707
+ Object of DataSource.
1708
+
1709
+ RAISES:
1710
+ TeradataMLException
1711
+
1712
+ EXAMPLES:
1713
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
1714
+ # Create DataFrame on admissions data.
1715
+ >>> load_example_data("dataframe", "admissions_train")
1716
+ >>> df = DataFrame("admissions_train")
1717
+ >>> df
1718
+ masters gpa stats programming admitted
1719
+ id
1720
+ 34 yes 3.85 Advanced Beginner 0
1721
+ 32 yes 3.46 Advanced Beginner 0
1722
+ 11 no 3.13 Advanced Advanced 1
1723
+ 40 yes 3.95 Novice Beginner 0
1724
+ 38 yes 2.65 Advanced Beginner 1
1725
+ 36 no 3.00 Advanced Novice 0
1726
+ 7 yes 2.33 Novice Novice 1
1727
+ 26 yes 3.57 Advanced Advanced 1
1728
+ 19 yes 1.98 Advanced Advanced 0
1729
+ 13 no 4.00 Advanced Novice 1
1730
+
1731
+ # Create FeatureStore for repo 'vfs_v1'.
1732
+ >>> fs = FeatureStore("vfs_v1")
1733
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1734
+ # Setup FeatureStore for this repository.
1735
+ >>> fs.setup()
1736
+ True
1737
+
1738
+ # Create DataSource using DataFrame 'df' with name 'admissions'.
1739
+ >>> ds = DataSource('admissions', source=df)
1740
+ # Apply the DataSource to FeatureStore 'vfs_v1'.
1741
+ >>> fs.apply(ds)
1742
+ True
1743
+
1744
+ # Get the DataSource 'admissions' from repo 'vfs_v1'
1745
+ >>> ds = fs.get_data_source('admissions')
1746
+ >>> ds
1747
+ DataSource(name=admissions)
1748
+ """
1749
+ self._logger.info(f"Getting data source '{name}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
1750
+
1751
+ argument_validation_params = []
1752
+ argument_validation_params.append(["name", name, False, (str), True])
1753
+
1754
+ # Validate argument types
1755
+ _Validators._validate_function_arguments(argument_validation_params)
1756
+
1757
+ df = self.__get_data_source_df()
1758
+ df = df[(df['name'] == name) &
1759
+ (df['data_domain'] == self.__data_domain)]
1760
+
1761
+ # Check if a data source with that name exists or not. If not, raise error.
1762
+ if df.shape[0] == 0:
1763
+ res = _FSUtils._get_data_domains(self.__repo, name, 'data_source')
1764
+ if res:
1765
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
1766
+ error_msg = Messages.get_message(msg_code, "DataSource", "name '{}'".format(name),
1767
+ self.__data_domain, res)
1768
+ else:
1769
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
1770
+ error_msg = Messages.get_message(msg_code, "DataSource", "name '{}'".format(name),
1771
+ self.__data_domain)
1772
+ raise TeradataMlException(error_msg, msg_code)
1773
+
1774
+ return DataSource._from_df(df)
1775
+
1776
+ def get_feature_process(self, object, entity=None, features=None, description=None):
1777
+ """
1778
+ DESCRIPTION:
1779
+ Retrieves the FeatureProcess object.
1780
+
1781
+ PARAMETERS:
1782
+ object:
1783
+ Required Argument.
1784
+ Specifies the source to ingest feature values. It can be one of the following:
1785
+ * teradataml DataFrame
1786
+ * Feature group
1787
+ * Process id
1788
+ Notes:
1789
+ * If "object" is of type teradataml DataFrame, then "entity"
1790
+ and "features" should be provided.
1791
+ * If "object" is of type str, then it is considered as
1792
+ as process id of an existing FeatureProcess and reruns the
1793
+ process. Entity and features are taken from the existing
1794
+ feature process. Hence, the arguments "entity" and "features"
1795
+ are ignored.
1796
+ * If "object" is of type FeatureGroup, then entity and features
1797
+ are taken from the FeatureGroup. Hence, the arguments "entity"
1798
+ and "features" are ignored.
1799
+ Types: DataFrame or FeatureGroup or str
1800
+
1801
+ entity:
1802
+ Optional Argument.
1803
+ Specifies Entity for DataFrame.
1804
+ Notes:
1805
+ * Ignored when "object" is of type FeatureGroup or str.
1806
+ * If a string or list of strings is provided, then "object" should
1807
+ have these columns in it.
1808
+ * If Entity object is provided, then associated columns in Entity
1809
+ object should be present in DataFrame.
1810
+ Types: Entity or str or list of str
1811
+
1812
+ features:
1813
+ Optional Argument.
1814
+ Specifies list of features to be considered in feature process. Feature
1815
+ ingestion takes place only for these features.
1816
+ Note:
1817
+ * Ignored when "object" is of type FeatureGroup or str.
1818
+ Types: Feature or list of Feature or str or list of str.
1819
+
1820
+ description:
1821
+ Optional Argument.
1822
+ Specifies description for the FeatureProcess.
1823
+ Types: str
1824
+
1825
+ RETURNS:
1826
+ FeatureProcess
1827
+
1828
+ RAISES:
1829
+ None.
1830
+
1831
+ EXAMPLES:
1832
+ >>> from teradataml import FeatureStore
1833
+ >>> fs = FeatureStore('vfs_v1')
1834
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1835
+ # Setup FeatureStore for this repository.
1836
+ >>> fs.setup()
1837
+ True
1838
+
1839
+ # Load the admissions data to Vantage.
1840
+ >>> from teradataml import DataFrame, load_example_data
1841
+ >>> load_example_data("dataframe", "admissions_train")
1842
+ >>> admission_df = DataFrame("admissions_train")
1843
+
1844
+ >>> fp = FeatureProcess(repo='vfs_v1',
1845
+ ... data_domain='d1',
1846
+ ... object=admission_df,
1847
+ ... entity='id',
1848
+ ... features=['stats', 'programming', 'admitted'])
1849
+ >>> fp.run()
1850
+ Process '0d365f08-66b0-11f0-88ff-b0dcef8381ea' started.
1851
+ Process '0d365f08-66b0-11f0-88ff-b0dcef8381ea' completed.
1852
+
1853
+ >>> fs.get_feature_process(object='0d365f08-66b0-11f0-88ff-b0dcef8381ea')
1854
+ FeatureProcess(repo=vfs_v1, data_domain=d1, process_id=0d365f08-66b0-11f0-88ff-b0dcef8381ea)
1855
+ """
1856
+ self._logger.info(f"Getting FeatureProcess from repository '{self.__repo}', data_domain '{self.__data_domain}', object: {object}, entity: {entity}, features: {features}")
1857
+
1858
+ return FeatureProcess(repo=self.__repo,
1859
+ data_domain=self.__data_domain,
1860
+ object=object,
1861
+ entity=entity,
1862
+ features=features,
1863
+ description=description
1864
+ )
1865
+
1866
+ def get_feature_catalog(self):
1867
+ """
1868
+ DESCRIPTION:
1869
+ Retrieves FeatureCatalog based on the feature store's repo and data domain.
1870
+
1871
+ PARAMETERS:
1872
+ None.
1873
+
1874
+ RETURNS:
1875
+ FeatureCatalog
1876
+
1877
+ RAISES:
1878
+ None.
1879
+
1880
+ EXAMPLES:
1881
+ >>> from teradataml import FeatureStore
1882
+ # Create FeatureStore for repo 'vfs_v1'.
1883
+ >>> fs = FeatureStore('vfs_v1')
1884
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1885
+ # Setup FeatureStore for this repository.
1886
+ >>> fs.setup()
1887
+ True
1888
+
1889
+ # Load the sales data to Vantage.
1890
+ from teradataml import load_example_data
1891
+ >>> load_example_data("dataframe", "sales")
1892
+ >>> df = DataFrame("sales")
1893
+
1894
+ # Create a feature process.
1895
+ >>> from teradataml import FeatureProcess
1896
+ >>> fp = FeatureProcess(repo="vfs_v1",
1897
+ ... data_domain='sales',
1898
+ ... object=df,
1899
+ ... entity="accounts",
1900
+ ... features=["Jan", "Feb", "Mar", "Apr"])
1901
+ >>> fp.run()
1902
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
1903
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
1904
+
1905
+ # Get FeatureCatalog from FeatureStore.
1906
+ >>> fs.get_feature_catalog()
1907
+ FeatureCatalog(repo=vfs_v1, data_domain=sales)
1908
+ """
1909
+ self._logger.info(f"Getting FeatureCatalog for repository '{self.__repo}', data_domain '{self.__data_domain}'")
1910
+ result = FeatureCatalog(repo=self.__repo, data_domain=self.__data_domain)
1911
+ self._logger.debug(f"Created FeatureCatalog object: {result}")
1912
+ return result
1913
+
1914
+ def get_data_domain(self):
1915
+ """
1916
+ DESCRIPTION:
1917
+ Retrieves DataDomain based on the feature store's repo and data domain.
1918
+
1919
+ PARAMETERS:
1920
+ None
1921
+
1922
+ RETURNS:
1923
+ DataDomain
1924
+
1925
+ RAISES:
1926
+ None.
1927
+
1928
+ EXAMPLES:
1929
+ >>> from teradataml import FeatureStore
1930
+ # Create FeatureStore for repo 'vfs_v1'.
1931
+ >>> fs = FeatureStore('vfs_v1', data_domain='sales')
1932
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1933
+ # Setup FeatureStore for this repository.
1934
+ >>> fs.setup()
1935
+
1936
+ # Get DataDomain from FeatureStore.
1937
+ >>> fs.get_data_domain()
1938
+ DataDomain(repo=vfs_v1, data_domain=sales)
1939
+ """
1940
+ self._logger.info(f"Getting DataDomain for repository '{self.__repo}', data_domain '{self.__data_domain}'")
1941
+ result = DataDomain(repo=self.__repo, data_domain=self.__data_domain)
1942
+ self._logger.debug(f"Created DataDomain object: {result}")
1943
+ return result
1944
+
1945
+ def get_dataset_catalog(self):
1946
+ """
1947
+ DESCRIPTION:
1948
+ Retrieves DatasetCatalog based on the feature store's repo and data domain.
1949
+
1950
+ PARAMETERS:
1951
+ None.
1952
+
1953
+ RETURNS:
1954
+ DatasetCatalog
1955
+
1956
+ RAISES:
1957
+ None.
1958
+
1959
+ EXAMPLES:
1960
+ >>> from teradataml import FeatureStore
1961
+ # Create FeatureStore for repo 'vfs_v1'.
1962
+ >>> fs = FeatureStore('vfs_v1', data_domain='sales')
1963
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
1964
+ # Setup FeatureStore for this repository.
1965
+ >>> fs.setup()
1966
+
1967
+ # Load the sales data to Vantage.
1968
+ >>> from teradataml import load_example_data
1969
+ >>> load_example_data("dataframe", "sales")
1970
+ >>> df = DataFrame("sales")
1971
+
1972
+ # Create a feature process.
1973
+ >>> from teradataml import FeatureProcess
1974
+ >>> fp = FeatureProcess(repo="vfs_v1",
1975
+ ... data_domain='sales',
1976
+ ... object=df,
1977
+ ... entity="accounts",
1978
+ ... features=["Jan", "Feb", "Mar", "Apr"])
1979
+ >>> fp.run()
1980
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
1981
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
1982
+ True
1983
+
1984
+ # Build the dataset.
1985
+ >>> dc = DatasetCatalog(repo='vfs_v1', data_domain='sales')
1986
+ >>> dataset = dc.build_dataset(entity='accounts',
1987
+ ... selected_features = {
1988
+ ... 'Jan': fp.process_id,
1989
+ ... 'Feb': fp.process_id},
1990
+ ... view_name='ds_jan_feb',
1991
+ ... description='Dataset with Jan and Feb features')
1992
+
1993
+ # Get DatasetCatalog from FeatureStore.
1994
+ >>> fs.get_dataset_catalog()
1995
+ DatasetCatalog(repo=vfs_v1, data_domain=sales)
1996
+ """
1997
+ self._logger.info(f"Getting DatasetCatalog for repository '{self.__repo}', data_domain '{self.__data_domain}'")
1998
+ result = DatasetCatalog(repo=self.__repo, data_domain=self.__data_domain)
1999
+ self._logger.debug(f"Created DatasetCatalog object: {result}")
2000
+ return result
2001
+
2002
+ def set_features_inactive(self, names):
2003
+ """
2004
+ DESCRIPTION:
2005
+ Mark the feature status as 'inactive'. Note that, inactive features are
2006
+ not available for any further processing. Set the status as 'active' with
2007
+ "set_features_active()" method.
2008
+
2009
+ PARAMETERS:
2010
+ names:
2011
+ Required Argument.
2012
+ Specifies the name(s) of the feature(s).
2013
+ Types: str OR list of str
2014
+
2015
+ RETURNS:
2016
+ bool
2017
+
2018
+ RAISES:
2019
+ teradataMLException
2020
+
2021
+ EXAMPLES:
2022
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
2023
+ # Create DataFrame on admissions data.
2024
+ >>> load_example_data("dataframe", "admissions_train")
2025
+ >>> df = DataFrame("admissions_train")
2026
+ >>> df
2027
+ masters gpa stats programming admitted
2028
+ id
2029
+ 34 yes 3.85 Advanced Beginner 0
2030
+ 32 yes 3.46 Advanced Beginner 0
2031
+ 11 no 3.13 Advanced Advanced 1
2032
+ 40 yes 3.95 Novice Beginner 0
2033
+ 38 yes 2.65 Advanced Beginner 1
2034
+ 36 no 3.00 Advanced Novice 0
2035
+ 7 yes 2.33 Novice Novice 1
2036
+ 26 yes 3.57 Advanced Advanced 1
2037
+ 19 yes 1.98 Advanced Advanced 0
2038
+ 13 no 4.00 Advanced Novice 1
2039
+
2040
+ # Create FeatureStore for repo 'vfs_v1'.
2041
+ >>> fs = FeatureStore("vfs_v1")
2042
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
2043
+ # Setup FeatureStore for this repository.
2044
+ >>> fs.setup()
2045
+ True
2046
+
2047
+ # Create FeatureGroup from DataFrame df.
2048
+ >>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
2049
+ # Apply the FeatureGroup to FeatureStore 'vfs_v1'.
2050
+ >>> fs.apply(fg)
2051
+ True
2052
+
2053
+ # Get FeatureGroup 'admissions' from FeatureStore.
2054
+ >>> fg = fs.get_feature_group('admissions')
2055
+ >>> fg
2056
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
2057
+
2058
+ # Example 1: Set the Feature 'programming' inactive.
2059
+ # Set the Feature 'programming' inactive.
2060
+ >>> fs.set_features_inactive('programming')
2061
+ True
2062
+
2063
+ # Get FeatureGroup again after setting feature inactive.
2064
+ >>> fg = fs.get_feature_group('admissions')
2065
+ >>> fg
2066
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
2067
+
2068
+ """
2069
+ self._logger.info(f"Setting features inactive: {names} in repository '{self.__repo}', data_domain '{self.__data_domain}'")
2070
+ return self.__set_active_inactive_features(names, active=False)
2071
+
2072
+ def set_features_active(self, names):
2073
+ """
2074
+ DESCRIPTION:
2075
+ Mark the feature status as active. Set the status as 'inactive' with
2076
+ "set_features_inactive()" method. Note that, inactive features are
2077
+ not available for any further processing.
2078
+
2079
+ PARAMETERS:
2080
+ names:
2081
+ Required Argument.
2082
+ Specifies the name(s) of the feature(s).
2083
+ Types: str OR list of str
2084
+
2085
+ RETURNS:
2086
+ bool
2087
+
2088
+ RAISES:
2089
+ teradataMLException
2090
+
2091
+ EXAMPLES:
2092
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
2093
+ # Create DataFrame on admissions data.
2094
+ >>> load_example_data("dataframe", "admissions_train")
2095
+ >>> df = DataFrame("admissions_train")
2096
+ >>> df
2097
+ masters gpa stats programming admitted
2098
+ id
2099
+ 34 yes 3.85 Advanced Beginner 0
2100
+ 32 yes 3.46 Advanced Beginner 0
2101
+ 11 no 3.13 Advanced Advanced 1
2102
+ 40 yes 3.95 Novice Beginner 0
2103
+ 38 yes 2.65 Advanced Beginner 1
2104
+ 36 no 3.00 Advanced Novice 0
2105
+ 7 yes 2.33 Novice Novice 1
2106
+ 26 yes 3.57 Advanced Advanced 1
2107
+ 19 yes 1.98 Advanced Advanced 0
2108
+ 13 no 4.00 Advanced Novice 1
2109
+
2110
+ # Create FeatureStore for repo 'vfs_v1'.
2111
+ >>> fs = FeatureStore("vfs_v1")
2112
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
2113
+ # Setup FeatureStore for this repository.
2114
+ >>> fs.setup()
2115
+ True
2116
+
2117
+ # Create FeatureGroup from DataFrame df.
2118
+ >>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
2119
+ # Apply the FeatureGroup to FeatureStore 'vfs_v1'.
2120
+ >>> fs.apply(fg)
2121
+ True
2122
+
2123
+ # Get FeatureGroup 'admissions' from FeatureStore.
2124
+ >>> fg = fs.get_feature_group('admissions')
2125
+ >>> fg
2126
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
2127
+
2128
+ # Example 1: Set the Feature 'programming' inactive.
2129
+ # Set the Feature 'programming' inactive.
2130
+ >>> fs.set_features_inactive('programming')
2131
+ True
2132
+
2133
+ # Get FeatureGroup again after setting feature inactive.
2134
+ >>> fg = fs.get_feature_group('admissions')
2135
+ >>> fg
2136
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
2137
+
2138
+ # Mark Feature 'programming' from 'inactive' to 'active'.
2139
+ >>> fs.set_features_active('programming')
2140
+ # Get FeatureGroup again after setting feature active.
2141
+ >>> fg = fs.get_feature_group('admissions')
2142
+ >>> fg
2143
+ FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
2144
+ >>>
2145
+ """
2146
+ self._logger.info(f"Setting features active: {names} in repository '{self.__repo}', data_domain '{self.__data_domain}'")
2147
+ return self.__set_active_inactive_features(names, active=True)
2148
+
2149
+ def __set_active_inactive_features(self, names, active):
2150
+ """
2151
+ DESCRIPTION:
2152
+ Internal function to either active or inactive features.
2153
+
2154
+ PARAMETERS:
2155
+ names:
2156
+ Required Argument.
2157
+ Specifies the name the feature.
2158
+ Types: str OR list of str
2159
+
2160
+ RETURNS:
2161
+ bool
2162
+
2163
+ RAISES:
2164
+ teradataMLException
2165
+
2166
+ EXAMPLES:
2167
+ # Example 1: Archive the feature 'feature1' in the repo
2168
+ # 'vfs_v1'.
2169
+ >>> from teradataml import FeatureStore
2170
+ >>> fs = FeatureStore('vfs_v1')
2171
+ >>> fs.__archive_unarchive_features(name='feature1')
2172
+ True
2173
+ >>>
2174
+ """
2175
+ names = UtilFuncs._as_list(names)
2176
+
2177
+ argument_validation_params = []
2178
+ argument_validation_params.append(["names", names, False, (str, list), True])
2179
+
2180
+ # Validate argument types
2181
+ _Validators._validate_function_arguments(argument_validation_params)
2182
+
2183
+ status = FeatureStatus.ACTIVE.name if active else FeatureStatus.INACTIVE.name
2184
+
2185
+ is_set = True
2186
+ if status == FeatureStatus.INACTIVE.name:
2187
+ # Get the joined df of '_efs_features' and '_efs_features_metadata'.
2188
+ feature_info_df = self.__get_feature_info_df()
2189
+ metadata_features = [feature.name for feature in feature_info_df.itertuples()]
2190
+
2191
+ # Form a list of user provided feature names which are
2192
+ # present in catalog and not present in catalog.
2193
+ catalog_features = []
2194
+ non_catalog_features = []
2195
+ for name in names:
2196
+ if name in metadata_features:
2197
+ catalog_features.append(name)
2198
+ else:
2199
+ non_catalog_features.append(name)
2200
+
2201
+ # If user provided all names are present in catalog.
2202
+ if len(catalog_features) == len(names):
2203
+ print("Feature(s) '{}' entries exists in feature catalog, cannot be set "
2204
+ "to inactive.".format(", ".join(catalog_features)))
2205
+ return False
2206
+ # If some of the user provided features present in catalog.
2207
+ elif len(catalog_features) > 0:
2208
+ print("Feature(s) '{}' entries exists in feature catalog, cannot be set "
2209
+ "to inactive.".format(", ".join(catalog_features)))
2210
+ is_set = is_set and False
2211
+
2212
+ # Assign feature names list which are not present in catalog.
2213
+ names = non_catalog_features
2214
+
2215
+ _update_data(table_name=self.__table_names['feature'],
2216
+ schema_name=self.__repo,
2217
+ update_columns_values={"status": status},
2218
+ update_conditions={"name": names}
2219
+ )
2220
+
2221
+ return is_set
2222
+
2223
+ def apply(self, object):
2224
+ """
2225
+ DESCRIPTION:
2226
+ Register objects to repository.
2227
+ Note:
2228
+ * If the object is an Entity or FeatureGroup and the same entity or feature group is already
2229
+ registered in the repository, it is not updated.
2230
+ * If the entity or feature group is associated with any feature process, an error is raised
2231
+ while modifying these objects.
2232
+
2233
+ PARAMETERS:
2234
+ object:
2235
+ Required Argument.
2236
+ Specifies the object to update the repository.
2237
+ Types: Feature OR DataSource OR Entity OR FeatureGroup.
2238
+
2239
+ RETURNS:
2240
+ bool.
2241
+
2242
+ RAISES:
2243
+ TeradataMLException
2244
+
2245
+ EXAMPLES:
2246
+ >>> from teradataml import FeatureStore, DataFrame, load_example_data
2247
+ # Create DataFrame on sales data.
2248
+ >>> load_example_data('dataframe', ['sales'])
2249
+ >>> df = DataFrame("sales")
2250
+
2251
+ # Create FeatureStore for repo 'vfs_v1'.
2252
+ >>> fs = FeatureStore("vfs_v1")
2253
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
2254
+ # Setup FeatureStore for this repository.
2255
+ >>> fs.setup()
2256
+ True
2257
+
2258
+ # Example 1: create a Feature for column 'Feb' from 'sales' DataFrame
2259
+ # and register with repo 'vfs_v1'.
2260
+ >>> # Create Feature.
2261
+ >>> from teradataml import Feature
2262
+ >>> feature = Feature('sales:Feb', df.Feb)
2263
+ >>> # Register the above Feature with repo.
2264
+ >>> fs.apply(feature)
2265
+ True
2266
+ >>>
2267
+
2268
+ # Example 2: create Entity for 'sales' DataFrame and register
2269
+ # with repo 'vfs_v1'.
2270
+ >>> # Create Entity.
2271
+ >>> from teradataml import Entity
2272
+ >>> entity = Entity('sales:accounts', df.accounts)
2273
+ >>> # Register the above Entity with repo.
2274
+ >>> fs.apply(entity)
2275
+ True
2276
+ >>>
2277
+
2278
+ # Example 3: create DataSource for 'sales' DataFrame and register
2279
+ # with repo 'vfs_v1'.
2280
+ >>> # Create DataSource.
2281
+ >>> from teradataml import DataSource
2282
+ >>> ds = DataSource('Sales_Data', df)
2283
+ >>> # Register the above DataSource with repo.
2284
+ >>> fs.apply(ds)
2285
+ True
2286
+ >>>
2287
+
2288
+ # Example 4: create FeatureStore with all the objects
2289
+ # created in above examples and register with
2290
+ # repo 'vfs_v1'.
2291
+ >>> # Create FeatureGroup.
2292
+ >>> from teradataml import FeatureGroup
2293
+ >>> fg = FeatureGroup('Sales',
2294
+ ... features=feature,
2295
+ ... entity=entity,
2296
+ ... data_source=data_source)
2297
+ >>> # Register the above FeatureStore with repo.
2298
+ >>> fs.apply(fg)
2299
+ True
2300
+ """
2301
+ self._logger.info(f"Applying object to FeatureStore repository '{self.__repo}', data_domain '{self.__data_domain}', object type: {type(object).__name__}")
2302
+
2303
+ argument_validation_params = []
2304
+ argument_validation_params.append(["name", object, False, (Feature, Entity, DataSource, FeatureGroup)])
2305
+
2306
+ # Validate argument types
2307
+ _Validators._validate_function_arguments(argument_validation_params)
2308
+
2309
+ result = object.publish(self.__repo, self.__data_domain)
2310
+ self._logger.debug(f"Successfully applied {type(object).__name__} to repository, result: {result}")
2311
+ return result
2312
+
2313
+ def get_data(self, process_id=None, entity=None, features=None,
2314
+ dataset_name=None, as_of=None, include_historic_records=False):
2315
+ """
2316
+ DESCRIPTION:
2317
+ Returns teradataml DataFrame which has entities and feature values.
2318
+ Method generates dataset from following -
2319
+ * process_id
2320
+ * entity and features
2321
+ * dataset_name
2322
+
2323
+ PARAMETERS:
2324
+ process_id:
2325
+ Optional Argument.
2326
+ Either "process_id", "entity" and "features", "dataset_name" is mandatory.
2327
+ Specifies the process id of an existing feature process.
2328
+ Types: str
2329
+
2330
+ entity:
2331
+ Optional Argument.
2332
+ Specifies the name of the Entity or Object of Entity
2333
+ to be considered in the dataset.
2334
+ Types: str or Entity.
2335
+
2336
+ features:
2337
+ Optional Argument.
2338
+ Specifies the names of Features and the corresponding feature version
2339
+ to be included in the dataset.
2340
+ Notes:
2341
+ * Key is the name of the feature and value is the version of the
2342
+ feature.
2343
+ * Look at FeatureCatalog.list_feature_versions() to get the list of
2344
+ features and their versions.
2345
+ Types: dict
2346
+
2347
+ dataset_name:
2348
+ Optional Argument.
2349
+ Specifies the dataset name.
2350
+ Types: str
2351
+
2352
+ as_of:
2353
+ Optional Argument.
2354
+ Specifies the time to retrieve the Feature Values instead of
2355
+ retrieving the latest values.
2356
+ Notes:
2357
+ * Applicable only when "process_id" is passed to the function.
2358
+ * Ignored when "dataset_name" is passed.
2359
+ Types: str or datetime.datetime
2360
+
2361
+ include_historic_records:
2362
+ Optional Argument.
2363
+ Specifies whether to include historic data in the dataset.
2364
+ Note:
2365
+ * If "as_of" is specified, then the "include_historic_records" argument is ignored.
2366
+ Default Value: False.
2367
+ Types: bool.
2368
+
2369
+
2370
+ RETURNS:
2371
+ teradataml DataFrame.
2372
+
2373
+ RAISES:
2374
+ TeradataMLException
2375
+
2376
+ EXAMPLES:
2377
+ >>> from teradataml import DataFrame, FeatureStore, load_example_data
2378
+ # Create DataFrame on sales data.
2379
+ >>> load_example_data("dataframe", "sales")
2380
+ >>> df = DataFrame("sales")
2381
+ >>> df
2382
+ Feb Jan Mar Apr datetime
2383
+ accounts
2384
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
2385
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
2386
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
2387
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
2388
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
2389
+
2390
+ >>> repo = 'vfs_v1'
2391
+ >>> data_domain = 'sales'
2392
+ >>> fs = FeatureStore(repo=repo, data_domain=data_domain)
2393
+ FeatureStore is ready to use.
2394
+
2395
+ # Example 1: Get the data from process_id.
2396
+ >>> fp = FeatureProcess(repo=repo,
2397
+ ... data_domain=data_domain,
2398
+ ... object=df,
2399
+ ... entity='accounts',
2400
+ ... features=['Jan', 'Feb'])
2401
+ >>> fp.run()
2402
+ Process '1e9e8d64-6851-11f0-99c5-a30631e77953' started.
2403
+ Process '1e9e8d64-6851-11f0-99c5-a30631e77953' completed.
2404
+ True
2405
+
2406
+ >>> fs.get_data(process_id=fp.process_id)
2407
+ accounts Feb Jan
2408
+ 0 Alpha Co 210.0 200.0
2409
+ 1 Blue Inc 90.0 50.0
2410
+ 2 Jones LLC 200.0 150.0
2411
+ 3 Orange Inc 210.0 NaN
2412
+ 4 Yellow Inc 90.0 NaN
2413
+ 5 Red Inc 200.0 150.0
2414
+
2415
+ # Example 2: Get the data from entity and features.
2416
+ >>> fs.get_data(entity='accounts', features={'Jan': fp.process_id})
2417
+ accounts Jan
2418
+ 0 Alpha Co 200.0
2419
+ 1 Blue Inc 50.0
2420
+ 2 Jones LLC 150.0
2421
+ 3 Orange Inc NaN
2422
+ 4 Yellow Inc NaN
2423
+ 5 Red Inc 150.0
2424
+
2425
+ # Example 3: Get the data from dataset name.
2426
+ >>> dc = DatasetCatalog(repo=repo, data_domain=data_domain)
2427
+ >>> dc.build_dataset(entity='accounts',
2428
+ ... selected_features={'Jan': fp.process_id,
2429
+ ... 'Feb': fp.process_id},
2430
+ ... view_name='test_get_data',
2431
+ ... description='Dataset with Jan and Feb')
2432
+ >>> fs.get_data(dataset_name='test_get_data')
2433
+ accounts Feb Jan
2434
+ 0 Alpha Co 210.0 200.0
2435
+ 1 Blue Inc 90.0 50.0
2436
+ 2 Jones LLC 200.0 150.0
2437
+ 3 Orange Inc 210.0 NaN
2438
+ 4 Yellow Inc 90.0 NaN
2439
+ 5 Red Inc 200.0 150.0
2440
+
2441
+
2442
+ # Example 4: Get the data from Entity and Features, where entity
2443
+ # object and feature objects passed to the entity and
2444
+ # features arguments.
2445
+ >>> # Create features.
2446
+ >>> feature1 = Feature('sales:Mar',
2447
+ ... df.Mar,
2448
+ ... feature_type=FeatureType.CATEGORICAL)
2449
+
2450
+ >>> feature2 = Feature('sales:Apr',
2451
+ ... df.Apr,
2452
+ ... feature_type=FeatureType.CONTINUOUS)
2453
+
2454
+ >>> # Create entity.
2455
+ >>> entity = Entity(name='accounts_entity', columns=['accounts'])
2456
+
2457
+ >>> fp1 = FeatureProcess(repo=repo,
2458
+ ... data_domain=data_domain,
2459
+ ... object=df,
2460
+ ... entity=entity,
2461
+ ... features=[feature1, feature2])
2462
+ >>> fp1.run()
2463
+ Process '5522c034-684d-11f0-99c5-a30631e77953' started.
2464
+ Process '5522c034-684d-11f0-99c5-a30631e77953' completed.
2465
+ True
2466
+
2467
+ >>> fs.get_data(entity=entity, features={feature1.name: fp1.process_id,
2468
+ ... feature2.name: fp1.process_id})
2469
+ accounts sales:Mar sales:Apr
2470
+ 0 Alpha Co 215.0 250.0
2471
+ 1 Blue Inc 95.0 101.0
2472
+ 2 Jones LLC 140.0 180.0
2473
+ 3 Orange Inc NaN 250.0
2474
+ 4 Yellow Inc NaN NaN
2475
+ 5 Red Inc 140.0 NaN
2476
+
2477
+ # Example 5: Get the data for the time passed by the user via the as_of argument.
2478
+ >>> import time
2479
+ >>> from datetime import datetime as dt, date as d
2480
+
2481
+ # Retrieve the record where accounts == 'Blue Inc'.
2482
+ >>> df_test = df[df['accounts'] == 'Blue Inc']
2483
+ >>> df_test
2484
+ Feb Jan Mar Apr datetime
2485
+ accounts
2486
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
2487
+
2488
+ # Example updates the data. Hence, creating a new table to avoid modifying the existing tables data.
2489
+ >>> df_test.to_sql('sales_test', if_exists='replace')
2490
+ >>> test_df = DataFrame('sales_test')
2491
+ >>> test_df
2492
+ accounts Feb Jan Mar Apr datetime
2493
+ 0 Blue Inc 90.0 50 95 101 17/01/04
2494
+
2495
+ >>> # Create a feature process.
2496
+ >>> fp = FeatureProcess(repo=repo,
2497
+ ... data_domain=data_domain,
2498
+ ... object=test_df,
2499
+ ... entity='accounts',
2500
+ ... features=['Jan', 'Feb'])
2501
+
2502
+ >>> # Run the feature process
2503
+ >>> fp.run()
2504
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
2505
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
2506
+ True
2507
+
2508
+ >>> # Running the same process more than once to demonstrate how user can
2509
+ >>> # retrieve specific version of Features using argument 'as_of'.
2510
+ >>> # Wait for 20 seconds. Then update the data. Then run again.
2511
+ >>> time.sleep(20)
2512
+ >>> execute_sql("update sales_test set Jan = Jan * 10, Feb = Feb * 10")
2513
+ TeradataCursor uRowsHandle=269 bClosed=False
2514
+
2515
+ >>> # Run the feature process again.
2516
+ >>> fp.run()
2517
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
2518
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
2519
+ True
2520
+
2521
+ >>> # Then again wait for 20 seconds. Then update the data. Then run again.
2522
+ >>> time.sleep(20)
2523
+ >>> execute_sql("update sales_test set Jan = Jan * 10, Feb = Feb * 10")
2524
+ TeradataCursor uRowsHandle=397 bClosed=False
2525
+
2526
+ >>> # Run the feature process again.
2527
+ >>> fp.run()
2528
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
2529
+ Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
2530
+ True
2531
+
2532
+ # Retrieve specific version of Features at '2025-08-15 12:37:23'
2533
+ >>> as_of_time = dt(2025, 8, 15, 12, 37, 23)
2534
+
2535
+ >>> # time passed to as_of in datetime.datetime format.
2536
+ >>> fs.get_data(process_id=fp.process_id,
2537
+ ... as_of=as_of_time)
2538
+ accounts Feb Jan
2539
+ 0 Blue Inc 900.0 500
2540
+
2541
+ >>> # time passed to as_of in string format.
2542
+ >>> fs.get_data(process_id=fp.process_id,
2543
+ ... as_of=as_of_time.strftime('%Y-%m-%d %H:%M:%S'))
2544
+ accounts Feb Jan
2545
+ 0 Blue Inc 900.0 500
2546
+
2547
+ # Example 6: Get the data for the time passed by the user via the as_of argument
2548
+ # by sourcing entity and features.
2549
+ >>> # time passed to as_of in datetime.datetime format.
2550
+ >>> fs.get_data(entity='accounts',
2551
+ ... features={'Feb': fp.process_id,
2552
+ ... 'Jan': fp.process_id},
2553
+ ... as_of=as_of_time)
2554
+ accounts Feb Jan
2555
+ 0 Blue Inc 900.0 500
2556
+
2557
+ >>> # time passed to as_of in string format.
2558
+ >>> fs.get_data(entity='accounts',
2559
+ ... features={'Feb': fp.process_id,
2560
+ ... 'Jan': fp.process_id},
2561
+ ... as_of=as_of_time.strftime('%Y-%m-%d %H:%M:%S'))
2562
+ accounts Feb Jan
2563
+ 0 Blue Inc 900.0 500
2564
+
2565
+ # Example 7: Get the latest data for the given process_id.
2566
+ >>> fs.get_data(process_id=fp.process_id, include_historic_records=False)
2567
+ accounts Feb Jan
2568
+ 0 Blue Inc 9000.0 5000
2569
+
2570
+ # Example 8: Get the historic data for the given process_id.
2571
+ >>> fs.get_data(process_id=fp.process_id, include_historic_records=True)
2572
+ accounts Feb Jan
2573
+ 0 Blue Inc 9000.0 5000
2574
+ 1 Blue Inc 90.0 50
2575
+ 2 Blue Inc 90.0 5000
2576
+ 3 Blue Inc 900.0 500
2577
+ 4 Blue Inc 900.0 5000
2578
+ 5 Blue Inc 900.0 50
2579
+ 6 Blue Inc 90.0 500
2580
+ 7 Blue Inc 9000.0 50
2581
+ 8 Blue Inc 9000.0 500
2582
+
2583
+ # Example 9: Get the latest data for the given feature.
2584
+ >>> fs.get_data(entity='accounts', features={'Feb': fp.process_id}, include_historic_records=False)
2585
+ accounts Feb
2586
+ 0 Blue Inc 9000.0
2587
+
2588
+ # Example 10: Get the historic data for the given feature.
2589
+ >>> fs.get_data(entity='accounts', features={'Feb': fp.process_id}, include_historic_records=True)
2590
+ accounts Feb
2591
+ 0 Blue Inc 900.0
2592
+ 1 Blue Inc 90.0
2593
+ 2 Blue Inc 9000.0
2594
+
2595
+ """
2596
+ self._logger.info(f"Getting data from repository '{self.__repo}', data_domain '{self.__data_domain}', process_id: {process_id}, entity: {entity}, features: {features}, dataset_name: {dataset_name}, as_of: {as_of}, include_historic_records: {include_historic_records}")
2597
+
2598
+ # Validate argument types
2599
+ args = []
2600
+ args.append(["process_id", process_id, True, (str), True])
2601
+ args.append(["entity", entity, True, (Entity, str), True])
2602
+ args.append(["features", features, True, (dict), True])
2603
+ args.append(["dataset_name", dataset_name, True, (str), True])
2604
+ args.append(["as_of", as_of, True, (str, dt), True])
2605
+ args.append(["include_historic_records", include_historic_records, True, (bool)])
2606
+
2607
+ _Validators._validate_function_arguments(args)
2608
+
2609
+ # Validate mutually exclusive arguments.
2610
+ _Validators._validate_mutually_exclusive_argument_groups({"process_id": process_id},
2611
+ {"dataset_name": dataset_name},
2612
+ {"entity": entity, "features": features})
2613
+
2614
+ # Validate whether entity and features are mutually inclusive.
2615
+ _Validators._validate_mutually_inclusive_arguments(entity, "entity",
2616
+ features, "features")
2617
+
2618
+ # Validate at least one argument is passed.
2619
+ _Validators._validate_any_argument_passed({"process_id": process_id,
2620
+ "entity' and 'features": entity,
2621
+ "dataset_name": dataset_name})
2622
+
2623
+ # If user pass view, return DataFrame directly.
2624
+ if dataset_name:
2625
+ return DataFrame(in_schema(self.__repo, dataset_name))
2626
+
2627
+ if process_id:
2628
+ entity, features = (
2629
+ self.__get_entity_and_features_from_process_id(process_id))
2630
+
2631
+ # Genarate the view name.
2632
+ view_name = UtilFuncs._generate_temp_table_name(databasename=self.__repo)
2633
+
2634
+ # When as_of is not None, get all the data instead of only latest.
2635
+ if as_of:
2636
+ include_historic_records = True
2637
+
2638
+ # Create the DatasetCatalog and build dataset on top of it.
2639
+ dc = DatasetCatalog(repo=self.__repo, data_domain=self.__data_domain)
2640
+ dataset = dc._build_dataset(
2641
+ entity, features,
2642
+ include_historic_records=include_historic_records,
2643
+ include_time_series=True if as_of else False,
2644
+ view_name=view_name,
2645
+ temporary=True)
2646
+
2647
+ if as_of:
2648
+ return self.__filter_dataset_by_as_of(dataset, entity, list(features.keys()), as_of)
2649
+ return dataset
2650
+
2651
+ def __get_entity_and_features_from_process_id(self, process_id):
2652
+ """
2653
+ DESCRIPTION:
2654
+ Internal function to get entity_columns, feature_columns, and s
2655
+ elected_features using process_id.
2656
+
2657
+ PARAMETERS:
2658
+ process_id:
2659
+ Required Argument.
2660
+ Specifies the process id of FeatureProcess.
2661
+ Types: str
2662
+
2663
+ RETURNS:
2664
+ entity_id, selected_features
2665
+
2666
+ RAISES:
2667
+ None
2668
+
2669
+ EXAMPLES:
2670
+ >>> fs.__get_entity_and_features_from_process_id('123-acd')
2671
+ """
2672
+ feature_ver = self.__get_feature_version()
2673
+ feature_ver = feature_ver[feature_ver["feature_version"] == process_id]
2674
+
2675
+ # Check if a feature with that process id exists or not. If not, raise error.
2676
+ if feature_ver.shape[0] == 0:
2677
+ res = _FSUtils._get_data_domains(self.__repo, process_id, 'feature_version')
2678
+ if res:
2679
+ msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
2680
+ error_msg = Messages.get_message(msg_code, "Feature", "process id '{}'".format(process_id),
2681
+ self.__data_domain, res)
2682
+ else:
2683
+ msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
2684
+ error_msg = Messages.get_message(msg_code, "Feature", "process id '{}'".format(process_id),
2685
+ self.__data_domain)
2686
+ raise TeradataMlException(error_msg, msg_code)
2687
+
2688
+ selected_features = {}
2689
+ for f_ver in feature_ver.itertuples():
2690
+ entity_id = f_ver.entity_id
2691
+ selected_features[f_ver.feature_name] = process_id
2692
+ return entity_id, selected_features
2693
+
2694
+ def __filter_dataset_by_as_of(self, dataset, entity_column, features_column_list, as_of):
2695
+ """
2696
+ DESCRIPTION:
2697
+ Internal function to filter the dataset using as_of and
2698
+ return only required columns.
2699
+
2700
+ PARAMETERS:
2701
+ dataset:
2702
+ Required Argument.
2703
+ Specifies the teradataml DataFrame.
2704
+ Types: teradataml DataFrame
2705
+
2706
+ entity_column:
2707
+ Required Argument.
2708
+ Specifies the column name of entity.
2709
+ Types: str
2710
+
2711
+ features_column_list:
2712
+ Required Argument.
2713
+ Specifies the list of feature columns list.
2714
+ Types: list of str
2715
+
2716
+ as_of:
2717
+ Required Argument.
2718
+ Specifies the time to retrieve the Feature Values instead of
2719
+ retrieving the latest values.
2720
+ Notes:
2721
+ * Applicable only when "process_id" is passed to the function.
2722
+ * Ignored when "dataset_name" is passed.
2723
+ Types: str or datetime.datetime
2724
+
2725
+ RETURNS:
2726
+ teradataml DataFrame
2727
+
2728
+ RAISES:
2729
+ None
2730
+
2731
+ EXAMPLES:
2732
+ >>> load_examples_data("dataframe", "sales")
2733
+ >>> df = DataFrame("sales")
2734
+ >>> fs.__filter_dataset_by_as_of(df, "accounts", ["Jan", "Feb"], datetime.datetime(2025, 1, 1))
2735
+
2736
+ """
2737
+ conditions = [
2738
+ (dataset[f"{f}_start_time"] <= as_of) & (as_of <= dataset[f"{f}_end_time"])
2739
+ for f in features_column_list
2740
+ ]
2741
+ combined_condition = reduce(operator.and_, conditions)
2742
+ required_columns = UtilFuncs._as_list(entity_column) + features_column_list
2743
+ return dataset[combined_condition].select(required_columns)
2744
+
2745
+ def __get_feature_group_names(self, name, type_):
2746
+ """
2747
+ DESCRIPTION:
2748
+ Internal function to get the associated group names for
2749
+ Feature or DataSource OR Entity.
2750
+
2751
+ PARAMETERS:
2752
+ name:
2753
+ Required Argument.
2754
+ Specifies the name of the Feature or DataSource or Entity.
2755
+ Types: str
2756
+
2757
+ type_:
2758
+ Required Argument.
2759
+ Specifies the type of the objects stored in feature store.
2760
+ Permitted Values:
2761
+ * feature
2762
+ * data_source
2763
+ * entity
2764
+ Types: str
2765
+
2766
+ RETURNS:
2767
+ list
2768
+
2769
+ RAISES:
2770
+ None
2771
+
2772
+ EXAMPLES:
2773
+ >>> self.__get_feature_group_names('admissions', 'data_source')
2774
+ """
2775
+ if type_ == "feature":
2776
+ df = self.__get_features_df()
2777
+ return [rec.group_name for rec in df[df.name == name].itertuples() if rec.group_name is not None]
2778
+ elif type_ == "data_source":
2779
+ df = self.__get_feature_group_df()
2780
+ return [rec.name for rec in df[df.data_source_name == name].itertuples()]
2781
+ elif type_ == "entity":
2782
+ df = self.__get_feature_group_df()
2783
+ return [rec.name for rec in df[df.entity_name == name].itertuples()]
2784
+
2785
+ def __remove_obj(self, name, type_, action="archive"):
2786
+ """
2787
+ DESCRIPTION:
2788
+ Internal function to get the remove Feature or DataSource OR
2789
+ Entity from repo.
2790
+
2791
+ PARAMETERS:
2792
+ name:
2793
+ Required Argument.
2794
+ Specifies the name of the Feature or DataSource or Entity.
2795
+ Types: str
2796
+
2797
+ type_:
2798
+ Required Argument.
2799
+ Specifies the type of "name".
2800
+ Types: str
2801
+ Permitted Values:
2802
+ * feature
2803
+ * data_source
2804
+ * entity
2805
+
2806
+ action:
2807
+ Optional Argument.
2808
+ Specifies whether to remove from staging tables or not.
2809
+ When set to True, object is removed from staging tables.
2810
+ Otherwise, object is fetched from regular tables.
2811
+ Default Value: True
2812
+ Types: bool
2813
+
2814
+ RETURNS:
2815
+ bool
2816
+
2817
+ RAISES:
2818
+ None
2819
+
2820
+ EXAMPLES:
2821
+ >>> self.__remove_obj('admissions', 'data_source')
2822
+ """
2823
+ self._logger.debug(f"Removing object '{name}' of type '{type_}' with action '{action}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
2824
+
2825
+ _vars = {
2826
+ "data_source": {"class": DataSource, "error_msg": "Update these FeatureGroups with other DataSources"},
2827
+ "entity": {"class": Entity, "error_msg": "Update these FeatureGroups with other Entities"},
2828
+ "feature": {"class": Feature, "error_msg": "Remove the Feature from FeatureGroup"},
2829
+ }
2830
+ c_name_ = _vars[type_]["class"].__name__
2831
+ argument_validation_params = []
2832
+ argument_validation_params.append([type_, name, False, (str, _vars[type_]["class"]), True])
2833
+
2834
+ # Validate argument types
2835
+ _Validators._validate_function_arguments(argument_validation_params)
2836
+ # Extract the name if argument is class type.
2837
+ if isinstance(name, _vars[type_]["class"]):
2838
+ self._logger.debug(f"Extracted name '{name.name}' from {type_} object")
2839
+ name = name.name
2840
+
2841
+ # Get the feature info DataFrame.
2842
+ feature_info_df = self.__get_feature_info_df()
2843
+
2844
+ # Before removing it, check if it is associated with any FeatureGroup.
2845
+ # If yes, raise error. Applicable only for Archive.
2846
+ if action == "archive":
2847
+ self._logger.debug(f"Checking if {type_} '{name}' is associated with feature groups before archiving")
2848
+ feature_groups = self.__get_feature_group_names(name, type_)
2849
+ if feature_groups:
2850
+ feature_groups_str = ", ".join(("'{}'".format(fg) for fg in feature_groups))
2851
+ self._logger.debug(f"{c_name_} '{name}' is associated with FeatureGroups: {feature_groups_str}")
2852
+ message = ("{} '{}' is associated with FeatureGroups {}. {} and try deleting again.".format(
2853
+ c_name_, name, feature_groups_str, _vars[type_]["error_msg"]))
2854
+ raise TeradataMlException(Messages.get_message(
2855
+ MessageCodes.FUNC_EXECUTION_FAILED, '{}_{}'.format(action, type_), message),
2856
+ MessageCodes.FUNC_EXECUTION_FAILED)
2857
+ # Check if the feature or entity exists in Feature metadata table.
2858
+ # If yes, then raise error. Applicable only for Archive.
2859
+ info_checks = {
2860
+ 'feature': ('name', MessageCodes.EFS_FEATURE_IN_CATALOG),
2861
+ 'entity': ('entity_name', MessageCodes.EFS_ENTITY_IN_CATALOG)
2862
+ }
2863
+ if type_ in info_checks:
2864
+ col, error_code = info_checks[type_]
2865
+ validate_df = feature_info_df[feature_info_df[col].isin([name])]
2866
+ if validate_df.shape[0] > 0:
2867
+ self._logger.debug(f"{c_name_} '{name}' exists in feature catalog and cannot be archived")
2868
+ if type_ == "entity":
2869
+ related_features = [feature.name for feature in validate_df.itertuples()]
2870
+ features = ", ".join(("'{}'".format(f) for f in related_features))
2871
+ err_msg = Messages.get_message(error_code,
2872
+ name,
2873
+ features)
2874
+ else:
2875
+ err_msg = Messages.get_message(error_code,
2876
+ name)
2877
+ raise TeradataMlException(err_msg, error_code)
2878
+
2879
+ stg_table = _FeatureStoreDFContainer.get_df("{}_staging".format(type_), self.__repo, self.__data_domain)
2880
+ stg_table = stg_table[stg_table.name == name]
2881
+ if stg_table.shape[0] > 0:
2882
+ self._logger.info(f"{c_name_} '{name}' is already archived")
2883
+ print("{} '{}' is already archived.".format(c_name_, name))
2884
+ return False
2885
+
2886
+ # Validation for delete action - ensure object is already archived
2887
+ if action == "delete":
2888
+ self._logger.debug(f"Validating {type_} '{name}' is archived before deletion")
2889
+ # Check if object exists in main table (not archived)
2890
+ main_table_name = self.__table_names[type_]
2891
+ main_df = _FeatureStoreDFContainer.get_df(type_, self.__repo, self.__data_domain)
2892
+ existing_records = main_df[(main_df["name"] == name)]
2893
+
2894
+ if existing_records.shape[0] > 0:
2895
+ self._logger.debug(f"{c_name_} '{name}' must be archived before deletion")
2896
+ error_code = MessageCodes.EFS_DELETE_BEFORE_ARCHIVE
2897
+ error_msg = Messages.get_message(error_code,
2898
+ c_name_,
2899
+ name,
2900
+ type_)
2901
+ raise TeradataMlException(error_msg, error_code)
2902
+
2903
+ if type_ == "entity":
2904
+ self._logger.debug(f"Removing entity '{name}' using specialized entity removal method")
2905
+ res = self._remove_entity(name, action)
2906
+ else:
2907
+ table_name = self.__table_names[type_]
2908
+ if action == "delete":
2909
+ table_name = self.__table_names["{}_staging".format(type_)]
2910
+
2911
+ self._logger.debug(f"Removing {type_} '{name}' from table '{table_name}'")
2912
+ res = _delete_data(table_name=table_name,
2913
+ schema_name=self.__repo,
2914
+ delete_conditions=(Col("name") == name) &
2915
+ (Col("data_domain") == self.__data_domain)
2916
+ )
2917
+
2918
+ if res == 1:
2919
+ self._logger.info(f"{c_name_} '{name}' successfully {action}d")
2920
+ print("{} '{}' is {}d.".format(c_name_, name, action))
2921
+ return True
2922
+ else:
2923
+ self._logger.debug(f"{c_name_} '{name}' does not exist to {action}")
2924
+ print("{} '{}' does not exist to {}.".format(c_name_, name, action))
2925
+ return False
2926
+
2927
+ @db_transaction
2928
+ def _remove_entity(self, name, action):
2929
+ """
2930
+ DESCRIPTION:
2931
+ Internal function to get the remove Entity from repo.
2932
+
2933
+ PARAMETERS:
2934
+ name:
2935
+ Required Argument.
2936
+ Specifies the name of the Entity.
2937
+ Types: str
2938
+
2939
+ action:
2940
+ Required Argument.
2941
+ Specifies whether to remove from staging tables or not.
2942
+ When set to "delete", Entity is removed from staging tables.
2943
+ Otherwise, Entity is removed from regular tables.
2944
+ Types: str
2945
+
2946
+ RETURNS:
2947
+ bool
2948
+
2949
+ RAISES:
2950
+ None
2951
+
2952
+ EXAMPLES:
2953
+ >>> self._remove_entity('admissions', 'delete')
2954
+ """
2955
+ self._logger.debug(f"Removing entity '{name}' from repository '{self.__repo}', data_domain '{self.__data_domain}', action: {action}")
2956
+
2957
+ ent_table = self.__table_names["entity"]
2958
+ ent_table_xref = self.__table_names["entity_xref"]
2959
+ if action == "delete":
2960
+ ent_table = self.__table_names["entity_staging"]
2961
+ ent_table_xref = self.__table_names["entity_staging_xref"]
2962
+
2963
+ # remove it from xref table first.
2964
+ self._logger.debug(f"Removing entity '{name}' from {ent_table_xref} table")
2965
+ _delete_data(table_name=ent_table_xref,
2966
+ schema_name=self.__repo,
2967
+ delete_conditions=(Col("entity_name") == name) &
2968
+ (Col("data_domain") == self.__data_domain)
2969
+ )
2970
+
2971
+ # remove from entity table.
2972
+ self._logger.debug(f"Removing entity '{name}' from {ent_table} table")
2973
+ res = _delete_data(table_name=ent_table,
2974
+ schema_name=self.__repo,
2975
+ delete_conditions=(Col("name") == name) &
2976
+ (Col("data_domain") == self.__data_domain)
2977
+ )
2978
+
2979
+ return res
2980
+
2981
+ def archive_data_source(self, data_source):
2982
+ """
2983
+ DESCRIPTION:
2984
+ Archives DataSource from repository. Note that archived DataSource
2985
+ is not available for any further processing. Archived DataSource can be
2986
+ viewed using "list_data_sources(archived=True)" method.
2987
+
2988
+ PARAMETERS:
2989
+ data_source:
2990
+ Required Argument.
2991
+ Specifies either the name of DataSource or Object of DataSource
2992
+ to archive from repository.
2993
+ Types: str OR DataSource
2994
+
2995
+ RETURNS:
2996
+ bool
2997
+
2998
+ RAISES:
2999
+ TeradataMLException, TypeError, ValueError
3000
+
3001
+ EXAMPLES:
3002
+ >>> from teradataml import DataFrame, DataSource, FeatureStore
3003
+ # Create FeatureStore for repo 'vfs_v1'.
3004
+ >>> fs = FeatureStore("vfs_v1")
3005
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3006
+ # Setup FeatureStore for this repository.
3007
+ >>> fs.setup()
3008
+ True
3009
+
3010
+ # Example 1: Archive the DataSource 'sales_data' in the repo 'vfs_v1' using DataSource object.
3011
+ # Create a DataSource using SELECT statement.
3012
+ >>> ds = DataSource(name="sales_data", source="select * from sales")
3013
+ # Apply DataSource to FeatureStore.
3014
+ >>> fs.apply(ds)
3015
+ True
3016
+
3017
+ # List the available DataSources.
3018
+ >>> fs.list_data_sources()
3019
+ description timestamp_column source creation_time modified_time
3020
+ name data_domain
3021
+ sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None
3022
+
3023
+ # Archive DataSource with name "sales_data".
3024
+ >>> fs.archive_data_source("sales_data")
3025
+ DataSource 'sales_data' is archived.
3026
+ True
3027
+
3028
+ # List the available DataSources after archive.
3029
+ >>> fs.list_data_sources(archived=True)
3030
+ name data_domain description timestamp_column source creation_time modified_time archived_time
3031
+ 0 sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None 2025-07-28 04:25:55.430000
3032
+
3033
+ # Example 2: Archive the DataSource 'sales_data' in the repo 'vfs_v1' using DataSource name.
3034
+ # Create a DataSource using teradataml DataFrame.
3035
+ >>> from teradataml import DataFrame
3036
+ >>> load_example_data('dataframe', ['sales'])
3037
+ >>> df = DataFrame("sales")
3038
+ >>> ds2 = DataSource(name="sales_data_df", source=df)
3039
+
3040
+ # Apply DataSource to FeatureStore.
3041
+ >>> fs.apply(ds2)
3042
+ True
3043
+
3044
+ # Archive DataSource with name "sales_data_df".
3045
+ >>> fs.archive_data_source("sales_data_df")
3046
+ DataSource 'sales_data_df' is archived.
3047
+ True
3048
+
3049
+ # List the available DataSources after archive.
3050
+ >>> fs.list_data_sources(archived=True)
3051
+ name data_domain description timestamp_column source creation_time modified_time archived_time
3052
+ 0 sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None 2025-07-28 04:25:55.430000
3053
+ 1 sales_data_df ALICE None None select * from sales 2025-07-28 04:26:10.123456 None 2025-07-28 04:26:45.456789
3054
+
3055
+
3056
+ """
3057
+ self._logger.info(f"Archiving data source '{data_source}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
3058
+ return self.__remove_obj(name=data_source, type_="data_source")
3059
+
3060
+ def delete_data_source(self, data_source):
3061
+ """
3062
+ DESCRIPTION:
3063
+ Removes the archived DataSource from repository.
3064
+
3065
+ PARAMETERS:
3066
+ data_source:
3067
+ Required Argument.
3068
+ Specifies either the name of DataSource or Object of DataSource
3069
+ to remove from repository.
3070
+ Types: str OR DataSource
3071
+
3072
+ RETURNS:
3073
+ bool.
3074
+
3075
+ RAISES:
3076
+ TeradataMLException, TypeError, ValueError
3077
+
3078
+ EXAMPLES:
3079
+ >>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
3080
+ # Create teradataml DataFrame.
3081
+ >>> load_example_data('dataframe', ['sales'])
3082
+ >>> df = DataFrame("sales")
3083
+
3084
+ # Create FeatureStore for repo 'vfs_v1'.
3085
+ >>> fs = FeatureStore("vfs_v1")
3086
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3087
+ # Setup FeatureStore for this repository.
3088
+ >>> fs.setup()
3089
+ True
3090
+
3091
+ # Create DataSource with source as teradataml DataFrame.
3092
+ >>> ds = DataSource(name="sales_data", source=df)
3093
+ # Apply the DataSource to FeatureStore.
3094
+ >>> fs.apply(ds)
3095
+ True
3096
+
3097
+ # Let's first archive the DataSource.
3098
+ >>> fs.archive_data_source("sales_data")
3099
+ DataSource 'sales_data' is archived.
3100
+ True
3101
+
3102
+ # Delete DataSource with name "sales_data".
3103
+ >>> fs.delete_data_source("sales_data")
3104
+ DataSource 'sales_data' is deleted.
3105
+ True
3106
+
3107
+ # List the available DataSources after delete.
3108
+ >>> fs.list_data_sources()
3109
+ Empty DataFrame
3110
+ Columns: [description, timestamp_column, source, creation_time, modified_time]
3111
+ Index: []
3112
+ """
3113
+ self._logger.info(f"Deleting data source '{data_source}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
3114
+ return self.__remove_obj(name=data_source, type_="data_source", action="delete")
3115
+
3116
+ def archive_feature(self, feature):
3117
+ """
3118
+ DESCRIPTION:
3119
+ Archives Feature from repository. Note that archived Feature
3120
+ is not available for any further processing. Archived Feature can be
3121
+ viewed using "list_features(archived=True)" method.
3122
+
3123
+ PARAMETERS:
3124
+ feature:
3125
+ Required Argument.
3126
+ Specifies either the name of Feature or Object of Feature
3127
+ to archive from repository.
3128
+ Types: str OR Feature
3129
+
3130
+ RETURNS:
3131
+ bool
3132
+
3133
+ RAISES:
3134
+ TeradataMLException, TypeError, ValueError
3135
+
3136
+ EXAMPLES:
3137
+ >>> from teradataml import DataFrame, Feature, FeatureStore
3138
+ # Create teradataml DataFrame.
3139
+ >>> load_example_data('dataframe', ['sales'])
3140
+ >>> df = DataFrame("sales")
3141
+
3142
+ # Create FeatureStore for repo 'vfs_v1'.
3143
+ >>> fs = FeatureStore("vfs_v1")
3144
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3145
+ # Setup FeatureStore for this repository.
3146
+ >>> fs.setup()
3147
+ True
3148
+
3149
+ Example 1: Archive the Feature 'sales_data_Feb' in the repo 'vfs_v1' using Feature object.
3150
+ # Create Feature for Column 'Feb'.
3151
+ >>> feature = Feature(name="sales_data_Feb", column=df.Feb)
3152
+ # Apply the Feature to FeatureStore.
3153
+ >>> fs.apply(feature)
3154
+ True
3155
+
3156
+ # List the available Features.
3157
+ >>> fs.list_features()
3158
+ id column_name description tags data_type feature_type status creation_time modified_time group_name
3159
+ name data_domain
3160
+ sales_data_Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None None
3161
+
3162
+ # Archive Feature with name "sales_data_Feb".
3163
+ >>> fs.archive_feature(feature=feature)
3164
+ Feature 'sales_data_Feb' is archived.
3165
+ True
3166
+
3167
+ # List the available archived Features.
3168
+ >>> fs.list_features(archived=True)
3169
+ id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
3170
+ 0 1 sales_data_Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None 2025-07-28 04:41:35.600000 None
3171
+
3172
+ # Example 2: Archive the Feature 'sales_data_Feb' in the repo 'vfs_v1' using feature name.
3173
+ # Create Feature for Column 'Jan'.
3174
+ >>> feature2 = Feature(name="sales_data_Jan", column=df.Jan)
3175
+ # Apply the Feature to FeatureStore.
3176
+ >>> fs.apply(feature2)
3177
+ True
3178
+
3179
+ # Archive Feature with name "sales_data_Jan".
3180
+ >>> fs.archive_feature(feature="sales_data_Jan")
3181
+ Feature 'sales_data_Jan' is archived.
3182
+ True
3183
+
3184
+ # List the available archived Features.
3185
+ >>> fs.list_features(archived=True)
3186
+ id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
3187
+ 0 1 sales_data_Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None 2025-07-28 04:41:35.600000 None
3188
+ 1 2 sales_data_Jan ALICE Jan None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:42:01.641026 None 2025-07-28 04:43:35.600000 None
3189
+
3190
+ """
3191
+ self._logger.info(f"Archiving feature from repository '{self.__repo}', data_domain '{self.__data_domain}', feature: {feature}")
3192
+ result = self.__remove_obj(name=feature, type_="feature")
3193
+ self._logger.debug(f"Feature archive operation completed with result: {result}")
3194
+ return result
3195
+
3196
+ def delete(self, force=False):
3197
+ """
3198
+ DESCRIPTION:
3199
+ Removes the FeatureStore and its components from repository.
3200
+ Notes:
3201
+ * The function removes all the associated database objects along with data.
3202
+ Be cautious while using this function.
3203
+ * The function tries to remove the underlying Database also once
3204
+ all the Feature Store objects are removed.
3205
+ * The user must have permission on the database used by this Feature Store
3206
+ * to drop triggers.
3207
+ * to drop the tables.
3208
+ * to drop the Database.
3209
+ * If the user lacks any of the mentioned permissions, Teradata recommends
3210
+ to not use this function.
3211
+
3212
+ PARAMETERS:
3213
+ force:
3214
+ Optional Argument.
3215
+ Specifies whether to forcefully delete feature store or not.
3216
+ When set to True, delete() method proceeds to drop objects
3217
+ even if previous step is errored. Otherwise, delete() method
3218
+ raises the exception at the first error and do not proceed to
3219
+ remove other objects.
3220
+ Defaults: False
3221
+ Types: bool
3222
+
3223
+ RETURNS:
3224
+ bool.
3225
+
3226
+ RAISES:
3227
+ None
3228
+
3229
+ EXAMPLES:
3230
+ # Setup FeatureStore for repo 'vfs_v1'.
3231
+ >>> from teradataml import FeatureStore
3232
+ >>> fs = FeatureStore("vfs_v1")
3233
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3234
+
3235
+ # Setup FeatureStore.
3236
+ >>> fs.setup()
3237
+ True
3238
+
3239
+ # Delete the FeatureStore and all its components.
3240
+ >>> fs.delete()
3241
+ The function removes Feature Store and drops the corresponding repo also. Are you sure you want to proceed? (Y/N): Y
3242
+ True
3243
+
3244
+ # Forcefully delete the FeatureStore and all its components.
3245
+ >>> fs.delete(force=True)
3246
+ The function removes Feature Store and drops the corresponding repo also. Are you sure you want to proceed? (Y/N): Y
3247
+ True
3248
+ """
3249
+ self._logger.info(f"Deleting FeatureStore and all components from repository '{self.__repo}', force: {force}")
3250
+
3251
+ _args = []
3252
+ _args.append(["force", force, True, (bool)])
3253
+ # Validate argument types
3254
+ _Validators._validate_function_arguments(_args)
3255
+
3256
+ confirmation = input("The function removes Feature Store and drops the "
3257
+ "corresponding repo also. Are you sure you want to proceed? (Y/N): ")
3258
+
3259
+ if confirmation in ["Y", "y"]:
3260
+ return self.__drop_feature_store_objects(force=force)
3261
+
3262
+ self._logger.info(f"FeatureStore deletion cancelled by user")
3263
+ return False
3264
+
3265
+ def __drop_feature_store_objects(self, force=False):
3266
+ """
3267
+ DESCRIPTION:
3268
+ Removes the FeatureStore and it's components from repository.
3269
+
3270
+ PARAMETERS:
3271
+ repo_name:
3272
+ Required Argument.
3273
+ Specifies the name of the repository.
3274
+ Types: str
3275
+
3276
+ force:
3277
+ Optional Argument.
3278
+ Specifies whether to forcefully delete feature store or not.
3279
+ When set to True, delete() method proceeds to drop objects
3280
+ even if previous step is errored. Otherwise, delete() method
3281
+ raises the exception at the first error and do not proceed to
3282
+ remove other objects.
3283
+ Defaults: False.
3284
+ Types: bool
3285
+
3286
+ RETURNS:
3287
+ bool
3288
+ """
3289
+ self._logger.debug(f"Starting to drop FeatureStore objects from repository '{self.__repo}', force: {force}")
3290
+
3291
+ # Drop all the tables and staging tables.
3292
+ self._logger.debug(f"Preparing to drop {len(self.__table_names)} table types from repository")
3293
+ tables_ = [
3294
+ self.__table_names["group_features"],
3295
+ self.__table_names["feature_group"],
3296
+ self.__table_names['feature'],
3297
+ self.__table_names['entity_xref'],
3298
+ self.__table_names['entity'],
3299
+ self.__table_names['data_source'],
3300
+ self.__table_names['feature_process'],
3301
+ self.__table_names['feature_runs'],
3302
+ self.__table_names['feature_metadata'],
3303
+ self.__table_names['dataset_catalog'],
3304
+ self.__table_names['dataset_features'],
3305
+ self.__table_names['data_domain'],
3306
+ self.__table_names['version']
3307
+ ]
3308
+
3309
+ tables_stg_ = [
3310
+ self.__table_names['feature_staging'],
3311
+ self.__table_names["entity_staging"],
3312
+ self.__table_names["entity_staging_xref"],
3313
+ self.__table_names["data_source_staging"],
3314
+ self.__table_names["feature_group_staging"],
3315
+ self.__table_names["group_features_staging"]
3316
+ ]
3317
+
3318
+ # Drop all the triggers first. So that tables can be dropped.
3319
+ self._logger.debug(f"Dropping {len(EFS_TRIGGERS)} triggers from repository '{self.__repo}'")
3320
+ ignr_errors = 'all' if force else None
3321
+ for trigger in EFS_TRIGGERS.values():
3322
+ self._logger.debug(f"Dropping trigger: {self.__repo}.{trigger}")
3323
+ execute_sql("drop trigger {}.{}".format(self.__repo, trigger),
3324
+ ignore_errors=ignr_errors)
3325
+
3326
+ # Drop the views first.
3327
+ self._logger.debug(f"Dropping views from repository '{self.__repo}'")
3328
+ views_ = [EFS_DB_COMPONENTS['feature_version']]
3329
+ for view in views_:
3330
+ self._logger.debug(f"Dropping view: {self.__repo}.{view}")
3331
+ db_drop_view(view, schema_name=self.__repo, suppress_error=force)
3332
+
3333
+ # Drop datesets.
3334
+ self._logger.debug(f"Dropping datasets from repository '{self.__repo}'")
3335
+ # Used EFS_DB_COMPONENTS['dataset_catalog'] because it contains all the datasets.
3336
+ # The get_df methods are filtered by data_domain, hence they don't show all datasets.
3337
+ for dataset in DataFrame(in_schema(self.__repo, EFS_DB_COMPONENTS['dataset_catalog'])).itertuples():
3338
+ self._logger.debug(f"Dropping dataset view: {dataset.name}")
3339
+ db_drop_view(dataset.name, schema_name=self.__repo, suppress_error=force)
3340
+
3341
+ # Drop all the Feature tables.
3342
+ self._logger.debug(f"Dropping feature tables from repository '{self.__repo}'")
3343
+ dropped_tab = set()
3344
+ # Used EFS_DB_COMPONENTS['feature_metadata'] because it contains all the features.
3345
+ # The get_df methods are filtered by data_domain, hence they don't show all features.
3346
+ for rec in DataFrame(in_schema(self.__repo, EFS_DB_COMPONENTS['feature_metadata'])).itertuples():
3347
+ # Avoid dropping the same table again.
3348
+ dropped_tab.add(rec.table_name)
3349
+
3350
+ for table in dropped_tab:
3351
+ self._logger.debug(f"Dropping feature table: {table}")
3352
+ db_drop_table(table, schema_name=self.__repo, suppress_error=force)
3353
+
3354
+ self._logger.debug(f"Dropping {len(tables_ + tables_stg_)} main and staging tables from repository '{self.__repo}'")
3355
+ for table in (tables_ + tables_stg_):
3356
+ self._logger.debug(f"Dropping table: {table}")
3357
+ db_drop_table(table, schema_name=self.__repo, suppress_error=force)
3358
+
3359
+ self._logger.debug(f"Dropping repository database: {self.__repo}")
3360
+ execute_sql(f"DROP DATABASE {self.__repo}")
3361
+
3362
+ self._logger.debug(f"Successfully completed dropping all FeatureStore objects from repository '{self.__repo}'")
3363
+ return True
3364
+
3365
+ def delete_feature(self, feature):
3366
+ """
3367
+ DESCRIPTION:
3368
+ Removes the archived Feature from repository.
3369
+
3370
+ PARAMETERS:
3371
+ feature:
3372
+ Required Argument.
3373
+ Specifies either the name of Feature or Object of Feature
3374
+ to remove from repository.
3375
+ Types: str OR Feature
3376
+
3377
+ RETURNS:
3378
+ bool.
3379
+
3380
+ RAISES:
3381
+ TeradataMLException, TypeError, ValueError
3382
+
3383
+ EXAMPLES:
3384
+ >>> from teradataml import DataFrame, Feature, FeatureStore
3385
+ # Create teradataml DataFrame.
3386
+ >>> load_example_data('dataframe', ['sales'])
3387
+ >>> df = DataFrame("sales")
3388
+
3389
+ # Create FeatureStore for repo 'vfs_v1'.
3390
+ >>> fs = FeatureStore("vfs_v1")
3391
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3392
+ # Setup FeatureStore for this repository.
3393
+ >>> fs.setup()
3394
+ True
3395
+
3396
+ # Example 1: Delete the Feature 'sales_data_Feb' in the repo 'vfs_v1' using Feature object.
3397
+ # Create Feature for Column 'Feb'.
3398
+ >>> feature = Feature(name="sales_data_Feb", column=df.Feb)
3399
+ # Add the feature created above in the feature store.
3400
+ >>> fs.apply(feature)
3401
+ True
3402
+
3403
+ # List the available Features.
3404
+ >>> fs.list_features()
3405
+ id column_name description tags data_type feature_type status creation_time modified_time group_name
3406
+ name data_domain
3407
+ sales_data_Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:49:55.827391 None None
3408
+
3409
+ # Let's first archive the Feature.
3410
+ >>> fs.archive_feature(feature=feature)
3411
+ Feature 'sales_data_Feb' is archived.
3412
+ True
3413
+
3414
+ # Delete Feature with name "sales_data_Feb".
3415
+ >>> fs.delete_feature(feature=feature)
3416
+ Feature 'sales_data_Feb' is deleted.
3417
+ True
3418
+
3419
+ # List the available Features after delete.
3420
+ >>> fs.list_features()
3421
+ Empty DataFrame
3422
+ Columns: [id, column_name, description, tags, data_type, feature_type, status, creation_time, modified_time, group_name]
3423
+ Index: []
3424
+
3425
+ Example 2: Delete the Feature 'sales_data_Feb' in the repo 'vfs_v1' using feature name.
3426
+ # Create Feature for Column 'Jan'.
3427
+ >>> feature2 = Feature(name="sales_data_Jan", column=df.Jan)
3428
+ # Add the feature created above in the feature store.
3429
+ >>> fs.apply(feature2)
3430
+ True
3431
+
3432
+ # List the available Features.
3433
+ >>> fs.list_features()
3434
+ id column_name description tags data_type feature_type status creation_time modified_time group_name
3435
+ name data_domain
3436
+ sales_data_Jan ALICE 2 Jan None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:50:55.827391 None None
3437
+
3438
+ # Let's first archive the Feature using feature name.
3439
+ >>> fs.archive_feature(feature="sales_data_Jan")
3440
+ Feature 'sales_data_Jan' is archived.
3441
+ True
3442
+
3443
+ # Delete Feature with name "sales_data_Jan".
3444
+ >>> fs.delete_feature(feature="sales_data_Jan")
3445
+ Feature 'sales_data_Jan' is deleted.
3446
+ True
3447
+ """
3448
+ self._logger.info(f"Deleting feature '{feature}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
3449
+ return self.__remove_obj(name=feature, type_="feature", action="delete")
3450
+
3451
+ def archive_entity(self, entity):
3452
+ """
3453
+ DESCRIPTION:
3454
+ Archives Entity from repository. Note that archived Entity
3455
+ is not available for any further processing. Archived Entity can be
3456
+ viewed using "list_entities(archived=True)" method.
3457
+
3458
+ PARAMETERS:
3459
+ entity:
3460
+ Required Argument.
3461
+ Specifies either the name of Entity or Object of Entity
3462
+ to remove from repository.
3463
+ Types: str OR Entity
3464
+
3465
+ RETURNS:
3466
+ bool.
3467
+
3468
+ RAISES:
3469
+ TeradataMLException, TypeError, ValueError
3470
+
3471
+ EXAMPLES:
3472
+ >>> from teradataml import DataFrame, Entity, FeatureStore
3473
+ # Create teradataml DataFrame.
3474
+ >>> load_example_data('dataframe', ['sales'])
3475
+ >>> df = DataFrame("sales")
3476
+
3477
+ # Create FeatureStore for repo 'vfs_v1'.
3478
+ >>> fs = FeatureStore("vfs_v1")
3479
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3480
+ # Setup FeatureStore for this repository.
3481
+ >>> fs.setup()
3482
+ True
3483
+
3484
+ # Example 1: Archive the Entity 'sales_data' in the repo 'vfs_v1' using Entity name.
3485
+ # Create Entity using teradataml DataFrame Column.
3486
+ >>> entity = Entity(name="sales_data", columns=df.accounts)
3487
+ # Apply the entity to FeatureStore.
3488
+ >>> fs.apply(entity)
3489
+ True
3490
+
3491
+ # List all the available entities.
3492
+ >>> fs.list_entities()
3493
+ description creation_time modified_time entity_column
3494
+ name data_domain
3495
+ sales_data ALICE None 2025-07-28 04:54:34.687139 None accounts
3496
+
3497
+ # Archive Entity with name "sales_data".
3498
+ >>> fs.archive_entity(entity=entity.name)
3499
+ Entity 'sales_data' is archived.
3500
+ True
3501
+
3502
+ # List the entities after archive.
3503
+ >>> fs.list_entities(archived=True)
3504
+ name data_domain description creation_time modified_time archived_time entity_column
3505
+ 0 sales_data ALICE None 2025-07-28 04:54:34.687139 None 2025-07-28 04:55:46.750000 accounts
3506
+
3507
+ # Example 2: Archive the Entity 'sales_data' in the repo 'vfs_v1' using Entity object.
3508
+ # Create Entity using teradataml DataFrame Column.
3509
+ >>> entity2 = Entity(name="sales_data_df", columns=df.accounts)
3510
+ # Apply the entity to FeatureStore.
3511
+ >>> fs.apply(entity2)
3512
+ True
3513
+
3514
+ # Archive Entity with Entity object.
3515
+ >>> fs.archive_entity(entity=entity2)
3516
+ Entity 'sales_data_df' is archived.
3517
+ True
3518
+
3519
+ # List the entities after archive.
3520
+ >>> fs.list_entities(archived=True)
3521
+ name data_domain description creation_time modified_time archived_time entity_column
3522
+ 0 sales_data ALICE None 2025-07-28 04:54:34.687139 None 2025-07-28 04:55:46.750000 accounts
3523
+ 1 sales_data_df ALICE None 2025-07-28 04:56:01.123456 None 2025-07-28 04:57:35.456789 accounts
3524
+
3525
+ """
3526
+ self._logger.info(f"Archiving entity '{entity}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
3527
+ return self.__remove_obj(name=entity, type_="entity")
3528
+
3529
+ def delete_entity(self, entity):
3530
+ """
3531
+ DESCRIPTION:
3532
+ Removes archived Entity from repository.
3533
+
3534
+ PARAMETERS:
3535
+ entity:
3536
+ Required Argument.
3537
+ Specifies either the name of Entity or Object of Entity
3538
+ to delete from repository.
3539
+ Types: str OR Entity
3540
+
3541
+ RETURNS:
3542
+ bool.
3543
+
3544
+ RAISES:
3545
+ TeradataMLException, TypeError, ValueError
3546
+
3547
+ EXAMPLES:
3548
+ >>> from teradataml import DataFrame, Entity, FeatureStore
3549
+ # Create teradataml DataFrame.
3550
+ >>> load_example_data('dataframe', ['sales'])
3551
+ >>> df = DataFrame("sales")
3552
+
3553
+ # Create FeatureStore for repo 'vfs_v1'.
3554
+ >>> fs = FeatureStore("vfs_v1")
3555
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3556
+ # Setup FeatureStore for this repository.
3557
+ >>> fs.setup()
3558
+ True
3559
+
3560
+ # Example 1: Delete the Entity 'sales_data' in the repo 'vfs_v1' using Entity name.
3561
+ # Create Entity using teradataml DataFrame Column.
3562
+ >>> entity = Entity(name="sales_data", columns=df.accounts)
3563
+ # Apply the entity to FeatureStore.
3564
+ >>> fs.apply(entity)
3565
+ True
3566
+
3567
+ # List all the available entities.
3568
+ >>> fs.list_entities()
3569
+ description creation_time modified_time entity_column
3570
+ name data_domain
3571
+ sales_data ALICE None 2025-07-28 04:58:01.123456 None accounts
3572
+
3573
+ # Let's first archive the entity.
3574
+ >>> fs.archive_entity(entity=entity.name)
3575
+ Entity 'sales_data' is archived.
3576
+ True
3577
+
3578
+ # Delete Entity with name "sales_data".
3579
+ >>> fs.delete_entity(entity=entity.name)
3580
+ Entity 'sales_data' is deleted.
3581
+ True
3582
+
3583
+ # List the entities after delete.
3584
+ >>> fs.list_entities()
3585
+ Empty DataFrame
3586
+ Columns: [id, column_name, description, tags, data_type, feature_type, status, creation_time, modified_time, group_name]
3587
+ Index: []
3588
+
3589
+ Example 2: Delete the Entity 'sales_data' in the repo 'vfs_v1' using Entity object.
3590
+ # Create Entity using teradataml DataFrame Column.
3591
+ >>> entity2 = Entity(name="sales_data_df", columns=df.accounts)
3592
+ # Apply the entity to FeatureStore.
3593
+ >>> fs.apply(entity2)
3594
+ True
3595
+
3596
+ # List all the available entities.
3597
+ >>> fs.list_entities()
3598
+ description creation_time modified_time entity_column
3599
+ name data_domain
3600
+ sales_data_df ALICE None 2025-07-28 04:59:14.325456 None accounts
3601
+
3602
+ # Let's first archive the entity.
3603
+ >>> fs.archive_entity(entity=entity2)
3604
+ Entity 'sales_data_df' is archived.
3605
+ True
3606
+
3607
+ # Delete Entity with Entity object.
3608
+ >>> fs.delete_entity(entity=entity2)
3609
+ Entity 'sales_data_df' is deleted.
3610
+ True
3611
+ """
3612
+ self._logger.info(f"Deleting entity '{entity}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
3613
+ return self.__remove_obj(name=entity, type_="entity", action="delete")
3614
+
3615
+ def __get_features_where_clause(self, features):
3616
+ """
3617
+ Internal function to prepare a where clause on features df.
3618
+ """
3619
+ col_expr = Col("name") == features[0]
3620
+ for feature in features[1:]:
3621
+ col_expr = ((col_expr) | (Col("name") == feature))
3622
+ col_expr = col_expr & (Col("data_domain") == self.__data_domain)
3623
+ return col_expr
3624
+
3625
+ def archive_feature_group(self, feature_group):
3626
+ """
3627
+ DESCRIPTION:
3628
+ Archives FeatureGroup from repository. Note that archived FeatureGroup
3629
+ is not available for any further processing. Archived FeatureGroup can be
3630
+ viewed using "list_feature_groups(archived=True)" method.
3631
+ Note:
3632
+ The function archives the associated Features, Entity and DataSource
3633
+ if they are not associated with any other FeatureGroups.
3634
+
3635
+ PARAMETERS:
3636
+ feature_group:
3637
+ Required Argument.
3638
+ Specifies either the name of FeatureGroup or Object of FeatureGroup
3639
+ to archive from repository.
3640
+ Types: str OR FeatureGroup
3641
+
3642
+ RETURNS:
3643
+ bool.
3644
+
3645
+ RAISES:
3646
+ TeradataMLException, TypeError, ValueError
3647
+
3648
+ EXAMPLES:
3649
+ >>> from teradataml import DataFrame, FeatureGroup, FeatureStore
3650
+ # Create teradataml DataFrame.
3651
+ >>> load_example_data('dataframe', ['sales'])
3652
+ >>> df = DataFrame("sales")
3653
+
3654
+ # Create FeatureStore for repo 'vfs_v1'.
3655
+ >>> fs = FeatureStore("vfs_v1", data_domain="d1")
3656
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3657
+ # Setup FeatureStore for this repository.
3658
+ >>> fs.setup()
3659
+ True
3660
+
3661
+ # Example 1: Archive the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup name.
3662
+ # Create FeatureGroup from teradataml DataFrame.
3663
+ >>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
3664
+ # Apply FeatureGroup to FeatureStore.
3665
+ >>> fs.apply(fg)
3666
+ True
3667
+
3668
+ # List all the available FeatureGroups.
3669
+ >>> fs.list_feature_groups()
3670
+ description data_source_name entity_name creation_time modified_time
3671
+ name data_domain
3672
+ sales d1 None sales sales 2025-07-28 05:00:19.780453 None
3673
+
3674
+ # Archive FeatureGroup with name "sales".
3675
+ >>> fs.archive_feature_group(feature_group='sales')
3676
+ FeatureGroup 'sales' is archived.
3677
+ True
3678
+
3679
+ # List all the available FeatureGroups after archive.
3680
+ >>> fs.list_feature_groups(archived=True)
3681
+ name data_domain description data_source_name entity_name creation_time modified_time archived_time
3682
+ 0 sales d1 None sales sales 2025-07-28 05:00:19.780453 None 2025-07-28 05:02:04.100000
3683
+
3684
+ # Example 2: Archive the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup object.
3685
+ # Create FeatureGroup from teradataml DataFrame.
3686
+ >>> fg2 = FeatureGroup.from_DataFrame(name="sales_df", entity_columns="accounts", df=df, timestamp_column="datetime")
3687
+ # Apply FeatureGroup to FeatureStore.
3688
+ >>> fs.apply(fg2)
3689
+ True
3690
+
3691
+ # Archive FeatureGroup with FeatureGroup object.
3692
+ >>> fs.archive_feature_group(feature_group=fg2)
3693
+ FeatureGroup 'sales_df' is archived.
3694
+ True
3695
+
3696
+ # List all the available FeatureGroups after archive.
3697
+ >>> fs.list_feature_groups(archived=True)
3698
+ name data_domain description data_source_name entity_name creation_time modified_time archived_time
3699
+ 0 sales d1 None sales sales 2025-07-28 05:00:19.780453 None 2025-07-28 05:02:04.100000
3700
+ 1 sales_df d1 None sales sales 2025-07-28 05:02:01.123456 None 2025-07-28 05:03:35.456789
3701
+ """
3702
+ self._logger.info(f"Archiving feature group '{feature_group}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
3703
+
3704
+ argument_validation_params = []
3705
+ argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
3706
+
3707
+ # Validate argument types
3708
+ _Validators._validate_function_arguments(argument_validation_params)
3709
+
3710
+ feature_group_name = feature_group if isinstance(feature_group, str) else feature_group.name
3711
+
3712
+ stg_table = _FeatureStoreDFContainer.get_df("feature_group_staging", self.__repo, self.__data_domain)
3713
+ stg_table = stg_table[stg_table.name == feature_group_name]
3714
+ if stg_table.shape[0] > 0:
3715
+ print("{} '{}' is already archived.".format('FeatureGroup', feature_group_name))
3716
+ return False
3717
+
3718
+ # Check if FeatureGroup is related to any FeatureProcess
3719
+ feature_process_df = self.list_feature_processes()
3720
+ related_processes = feature_process_df[(feature_process_df['data_source'] == feature_group_name)]
3721
+
3722
+ if related_processes.shape[0] > 0:
3723
+ process_ids = [fp.process_id for fp in related_processes.itertuples()]
3724
+ related_process_ids = "feature process(es) {}".format(process_ids)
3725
+ err_code = MessageCodes.EFS_OBJ_IN_FEATURE_PROCESS
3726
+ err_msg = Messages.get_message(err_code,
3727
+ 'FeatureGroup',
3728
+ feature_group_name,
3729
+ related_process_ids,
3730
+ "feature process(es)",
3731
+ "FeatureStore.archive_feature_process() and FeatureStore.delete_feature_process()",
3732
+ )
3733
+ raise TeradataMlException(err_msg, err_code)
3734
+
3735
+ fg = self.get_feature_group(feature_group_name) if isinstance(feature_group, str) else feature_group
3736
+
3737
+ fg_df = self.list_feature_groups()
3738
+
3739
+ # Find out shared Features. Extract the features which are mapped to
3740
+ # other groups. They can not be deleted.
3741
+ feature_names = [f.name for f in fg.features]
3742
+ features_df = self.list_features()
3743
+ col_expr = self.__get_features_where_clause(feature_names)
3744
+ features_df = features_df[((features_df.group_name != fg.name) & (col_expr))]
3745
+ shared_features = [f.name for f in features_df.drop_duplicate('name').itertuples()]
3746
+ feature_names_to_remove = [f for f in feature_names if f not in shared_features]
3747
+
3748
+ # Find out shared Entities. If entity is not shared, then update 'entity_name'
3749
+ # to update value.
3750
+ entity_name = None
3751
+ ent = fg_df[((fg_df.entity_name == fg.entity.name) & (fg_df.name != fg.name))]
3752
+ recs = ent.shape[0]
3753
+ if recs == 0:
3754
+ entity_name = fg.entity.name
3755
+
3756
+ # Find out shared DataSources. If datasource is not shared, then update 'data_source_name'.
3757
+ data_source_name = None
3758
+ ds_df = fg_df[((fg_df.data_source_name == fg.data_source.name) & (fg_df.name != fg.name))]
3759
+ recs = ds_df.shape[0]
3760
+ if recs == 0:
3761
+ data_source_name = fg.data_source.name
3762
+
3763
+ res = self._archive_feature_group(fg.name, feature_names_to_remove, entity_name, data_source_name)
3764
+
3765
+ if res == 1:
3766
+ print("FeatureGroup '{}' is archived.".format(feature_group_name))
3767
+ return True
3768
+
3769
+ print("FeatureGroup '{}' not exist to archive.".format(feature_group_name))
3770
+ return False
3771
+
3772
+ @db_transaction
3773
+ def _archive_feature_group(self, group_name, feature_names, entity_name, data_source_name):
3774
+ """
3775
+ DESCRIPTION:
3776
+ Internal method to archive FeatureGroup from repository.
3777
+
3778
+ PARAMETERS:
3779
+ group_name:
3780
+ Required Argument.
3781
+ Specifies the name of FeatureGroup to archive from repository.
3782
+ Types: str
3783
+
3784
+ feature_names:
3785
+ Required Argument.
3786
+ Specifies the name of Features to archive from repository.
3787
+ Types: list
3788
+
3789
+ entity_name:
3790
+ Required Argument.
3791
+ Specifies the name of Entity to archive from repository.
3792
+ Types: str
3793
+
3794
+ data_source_name:
3795
+ Required Argument.
3796
+ Specifies the name of DataSource to archive from repository.
3797
+ Types: str
3798
+
3799
+ RETURNS:
3800
+ bool.
3801
+
3802
+ RAISES:
3803
+ OperationalError
3804
+
3805
+ EXAMPLES:
3806
+ >>> self._archive_feature_group("group1", ["feature1"], "entity_name", None)
3807
+ """
3808
+ # Remove data for FeatureGroup from Xref table.
3809
+ # This allows to remove data from other tables.
3810
+ res = _delete_data(schema_name=self.__repo,
3811
+ table_name=self.__table_names["group_features"],
3812
+ delete_conditions=(Col("group_name") == group_name) &
3813
+ (Col("group_data_domain") == self.__data_domain)
3814
+ )
3815
+
3816
+ # Remove FeatureGroup.
3817
+ res = _delete_data(schema_name=self.__repo,
3818
+ table_name=self.__table_names["feature_group"],
3819
+ delete_conditions=(Col("name") == group_name) &
3820
+ (Col("data_domain") == self.__data_domain)
3821
+ )
3822
+
3823
+ # Remove Features.
3824
+ if feature_names:
3825
+ _delete_data(schema_name=self.__repo,
3826
+ table_name=self.__table_names["feature"],
3827
+ delete_conditions=self.__get_features_where_clause(feature_names)
3828
+ )
3829
+
3830
+ # Remove entities.
3831
+ if entity_name:
3832
+ _delete_data(schema_name=self.__repo,
3833
+ table_name=self.__table_names["entity_xref"],
3834
+ delete_conditions=(Col("entity_name") == entity_name) &
3835
+ (Col("data_domain") == self.__data_domain)
3836
+ )
3837
+
3838
+ _delete_data(schema_name=self.__repo,
3839
+ table_name=self.__table_names["entity"],
3840
+ delete_conditions=(Col("name") == entity_name) &
3841
+ (Col("data_domain") == self.__data_domain)
3842
+ )
3843
+
3844
+ # Remove DataSource.
3845
+ if data_source_name:
3846
+ _delete_data(schema_name=self.__repo,
3847
+ table_name=self.__table_names["data_source"],
3848
+ delete_conditions=(Col("name") == data_source_name) &
3849
+ (Col("data_domain") == self.__data_domain)
3850
+ )
3851
+
3852
+ return res
3853
+
3854
+ @db_transaction
3855
+ def delete_feature_group(self, feature_group):
3856
+ """
3857
+ DESCRIPTION:
3858
+ Removes archived FeatureGroup from repository.
3859
+ Note:
3860
+ Unlike 'archive_feature_group()', this function does not delete the
3861
+ associated Features, Entity and DataSource. One should delete those
3862
+ using 'delete_feature()', 'delete_entity()' and 'delete_data_source()'.
3863
+
3864
+ PARAMETERS:
3865
+ feature_group:
3866
+ Required Argument.
3867
+ Specifies either the name of FeatureGroup or Object of FeatureGroup
3868
+ to delete from repository.
3869
+ Types: str OR FeatureGroup
3870
+
3871
+ RETURNS:
3872
+ bool
3873
+
3874
+ RAISES:
3875
+ TeradataMLException, TypeError, ValueError
3876
+
3877
+ EXAMPLES:
3878
+ >>> from teradataml import DataFrame, FeatureGroup, FeatureStore
3879
+ # Create teradataml DataFrame.
3880
+ >>> load_example_data('dataframe', ['sales'])
3881
+ >>> df = DataFrame("sales")
3882
+
3883
+ # Create FeatureStore for repo 'vfs_v1'.
3884
+ >>> fs = FeatureStore("vfs_v1", data_domain="d1")
3885
+ Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
3886
+ # Setup FeatureStore for this repository.
3887
+ >>> fs.setup()
3888
+ True
3889
+
3890
+ # Example 1: Delete the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup name.
3891
+ # Create FeatureGroup from teradataml DataFrame.
3892
+ >>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
3893
+ # Apply FeatureGroup to FeatureStore.
3894
+ >>> fs.apply(fg)
3895
+ True
3896
+
3897
+ # List all the available FeatureGroups.
3898
+ >>> fs.list_feature_groups()
3899
+ description data_source_name entity_name creation_time modified_time
3900
+ name data_domain
3901
+ sales d1 None sales sales 2025-07-28 05:00:19.780453 None
3902
+
3903
+ # Archive FeatureGroup with name "sales".
3904
+ >>> fs.archive_feature_group(feature_group='sales')
3905
+ FeatureGroup 'sales' is archived.
3906
+ True
3907
+
3908
+ # Delete FeatureGroup with name "sales".
3909
+ >>> fs.delete_feature_group(feature_group='sales')
3910
+ FeatureGroup 'sales' is deleted.
3911
+ True
3912
+
3913
+ # List all the available FeatureGroups after delete.
3914
+ >>> fs.list_feature_groups()
3915
+ Empty DataFrame
3916
+ Columns: [description, data_source_name, entity_name, creation_time, modified_time]
3917
+ Index: []
3918
+
3919
+ Example 2: Delete the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup object.
3920
+ # Create FeatureGroup from teradataml DataFrame.
3921
+ >>> fg2 = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
3922
+ # Apply FeatureGroup to FeatureStore.
3923
+ >>> fs.apply(fg2)
3924
+ True
3925
+
3926
+ # Archive FeatureGroup with FeatureGroup object.
3927
+ >>> fs.archive_feature_group(feature_group=fg2)
3928
+ FeatureGroup 'sales' is archived.
3929
+ True
3930
+
3931
+ # Delete FeatureGroup with FeatureGroup object.
3932
+ >>> fs.delete_feature_group(feature_group=fg2)
3933
+ FeatureGroup 'sales' is deleted.
3934
+ True
3935
+ """
3936
+ self._logger.info(f"Deleting feature group '{feature_group}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
3937
+
3938
+ argument_validation_params = []
3939
+ argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
3940
+
3941
+ # Validate argument types
3942
+ _Validators._validate_function_arguments(argument_validation_params)
3943
+
3944
+ fg_name = feature_group if isinstance(feature_group, str) else feature_group.name
3945
+
3946
+ # Validation for delete action - ensure FeatureGroup is already archived
3947
+ main_fg_df = self.__get_feature_group_df()
3948
+ existing_records = main_fg_df[main_fg_df["name"] == fg_name]
3949
+
3950
+ if existing_records.shape[0] > 0:
3951
+ error_code = MessageCodes.EFS_DELETE_BEFORE_ARCHIVE
3952
+ error_msg = Messages.get_message(
3953
+ error_code,
3954
+ 'FeatureGroup',
3955
+ fg_name,
3956
+ 'feature_group')
3957
+ raise TeradataMlException(error_msg, error_code)
3958
+
3959
+ # Remove data for FeatureGroup.
3960
+ _delete_data(table_name=self.__table_names["group_features_staging"],
3961
+ schema_name=self.__repo,
3962
+ delete_conditions=(Col("group_name") == fg_name) &
3963
+ (Col("group_data_domain") == self.__data_domain)
3964
+ )
3965
+
3966
+ res = _delete_data(table_name=self.__table_names["feature_group_staging"],
3967
+ schema_name=self.__repo,
3968
+ delete_conditions=(Col("name") == fg_name) &
3969
+ (Col("data_domain") == self.__data_domain)
3970
+ )
3971
+
3972
+ if res == 1:
3973
+ print("FeatureGroup '{}' is deleted.".format(fg_name))
3974
+ return True
3975
+
3976
+ print("FeatureGroup '{}' does not exist to delete.".format(fg_name))
3977
+ return False
3978
+
3979
+ @property
3980
+ def version(self):
3981
+ """
3982
+ DESCRIPTION:
3983
+ Get the FeatureStore version.
3984
+
3985
+ PARAMETERS:
3986
+ None
3987
+
3988
+ RETURNS:
3989
+ str
3990
+
3991
+ RAISES:
3992
+ None
3993
+
3994
+ EXAMPLES:
3995
+ # Example 1: Get the version of FeatureStore version for
3996
+ # the repo 'vfs_v1'.
3997
+ >>> from teradataml import FeatureStore
3998
+ >>> fs = FeatureStore('vfs_v1')
3999
+ FeatureStore is ready to use.
4000
+
4001
+ # Get the version of FeatureStore.
4002
+ >>> fs.version
4003
+ '2.0.0'
4004
+ """
4005
+ self._logger.debug(f"Accessing version property for repository '{self.__repo}'")
4006
+ if self.__version is None:
4007
+ self.__version = self.__get_version()
4008
+ self._logger.debug(f"Retrieved FeatureStore version: {self.__version}")
4009
+ return self.__version
4010
+
4011
+ def list_feature_catalogs(self) -> DataFrame:
4012
+ """
4013
+ DESCRIPTION:
4014
+ Lists all the feature catalogs.
4015
+
4016
+ PARAMETERS:
4017
+ None
4018
+
4019
+ RETURNS:
4020
+ teradataml DataFrame
4021
+
4022
+ RAISES:
4023
+ None
4024
+
4025
+ EXAMPLES:
4026
+ # Example 1: List all the feature catalogs in the repo 'vfs_v1'.
4027
+ >>> from teradataml import FeatureStore
4028
+
4029
+ # Create FeatureStore for the repo 'vfs_v1' or use existing one.
4030
+ >>> fs = FeatureStore("vfs_v1")
4031
+ FeatureStore is ready to use.
4032
+
4033
+ # Load the sales data.
4034
+ >>> load_example_data("dataframe", "sales")
4035
+ >>> df = DataFrame("sales")
4036
+
4037
+ # Create a feature process.
4038
+ >>> from teradataml import FeatureProcess
4039
+ >>> fp = FeatureProcess(repo="vfs_v1",
4040
+ ... data_domain='sales',
4041
+ ... object=df,
4042
+ ... entity="accounts",
4043
+ ... features=["Jan", "Feb", "Mar", "Apr"])
4044
+ >>> fp.run()
4045
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
4046
+ Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
4047
+
4048
+ # List all the feature catalogs in the repo 'vfs_v1'.
4049
+ >>> fs.list_feature_catalogs()
4050
+ data_domain feature_id table_name valid_start valid_end
4051
+ entity_name
4052
+ accounts sales 2 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
4053
+ accounts sales 100001 FS_T_e84ff803_3d5c_4793_cd72_251c780fffe4 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
4054
+ accounts sales 1 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
4055
+ accounts sales 200001 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
4056
+ """
4057
+ self._logger.info(f"Listing feature catalogs from repository '{self.__repo}', data_domain '{self.__data_domain}'")
4058
+ df = self.__get_without_valid_period_df(self.__get_features_metadata_df())
4059
+ self._logger.debug(f"Retrieved feature catalogs:\n{df}")
4060
+ return df
4061
+
4062
+ def archive_feature_process(self, process_id):
4063
+ """
4064
+ DESCRIPTION:
4065
+ Archives the FeatureProcess with the given process_id.
4066
+ Notes:
4067
+ * Archived FeatureProcess is not available for any further processing.
4068
+ * Archived FeatureProcess can be viewed using `FeatureStore.list_feature_processes(archived=True)`.
4069
+ method.
4070
+ * Same feature can be ingested by multiple processes. If feature associated with
4071
+ process "process_id" is also associated with other processes, then this
4072
+ function only archives the feature values associated with the process "process_id". Else
4073
+ it archives the feature from the feature catalog. Look at `FeatureCatalog.archive_features()`.
4074
+ for more details.
4075
+
4076
+ PARAMETERS:
4077
+ process_id:
4078
+ Required Argument.
4079
+ Specifies the ID of the FeatureProcess to archive from repository.
4080
+ Types: str
4081
+
4082
+ RETURNS:
4083
+ bool
4084
+
4085
+ RAISES:
4086
+ TeradataMLException, TypeError, ValueError
4087
+
4088
+ EXAMPLES:
4089
+ >>> load_example_data('dataframe', ['sales'])
4090
+ # Create a teradataml DataFrame.
4091
+ >>> from teradataml import DataFrame, FeatureProcess, FeatureStore
4092
+ >>> df = DataFrame("sales")
4093
+
4094
+ # Create FeatureStore for repo 'repo'.
4095
+ >>> fs = FeatureStore("repo", data_domain='sales')
4096
+ Repo repo does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
4097
+ # Setup FeatureStore for this repository.
4098
+ >>> fs.setup()
4099
+ True
4100
+
4101
+ # Run FeatureProcess to ingest features.
4102
+ >>> from teradataml import FeatureProcess
4103
+ >>> fp = FeatureProcess(repo='repo',
4104
+ ... data_domain='sales',
4105
+ ... object=df,
4106
+ ... entity='accounts',
4107
+ ... features=['Jan', 'Feb', 'Mar', 'Apr'])
4108
+ >>> fp.run()
4109
+ Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' started.
4110
+ Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' completed.
4111
+
4112
+ # List the available FeatureProcesses.
4113
+ >>> fs.list_feature_processes()
4114
+ description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
4115
+ process_id
4116
+ 2a014f2d-6b71-11f0-aeda-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-07-28 05:10:34.760000+00: 9999-12-31 23:59:59.999999+00:
4117
+
4118
+ # Example: Archive the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
4119
+ >>> fs.archive_feature_process("2a014f2d-6b71-11f0-aeda-f020ffe7fe09")
4120
+ Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
4121
+ Feature 'Jan' is archived from metadata.
4122
+ Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
4123
+ Feature 'Feb' is archived from metadata.
4124
+ Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
4125
+ Feature 'Mar' is archived from metadata.
4126
+ Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
4127
+ Feature 'Apr' is archived from metadata.
4128
+ FeatureProcess with process id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is archived.
4129
+ True
4130
+ """
4131
+ argument_validation_params = []
4132
+ argument_validation_params.append(["process_id", process_id, True, str, True])
4133
+
4134
+ # Validate argument types
4135
+ _Validators._validate_function_arguments(argument_validation_params)
4136
+
4137
+ features = self.__validate_feature_process(process_id)
4138
+ if features is False:
4139
+ return False
4140
+
4141
+ feature_details = FeatureCatalog._get_feature_details(
4142
+ self.__repo, self.__data_domain, features)
4143
+
4144
+ # Get the shared features.
4145
+ shared_features = FeatureCatalog._get_shared_features(self.__repo, self.__data_domain)
4146
+
4147
+ # Remove the features from the feature metadata table.
4148
+ return self.__remove_feature_process(
4149
+ process_id, features, feature_details, shared_features)
4150
+
4151
+ def delete_feature_process(self, process_id):
4152
+ """
4153
+ DESCRIPTION:
4154
+ Deletes the archived feature process from feature store with the given process_id.
4155
+ Notes:
4156
+ * One feature can be ingested by multiple processes. If feature associated with
4157
+ process "process_id" is also ingested by other processes, then "delete_feature_process()"
4158
+ function only deletes the feature values associated with the process "process_id". Else
4159
+ it deletes the feature from the feature catalog. Look at 'FeatureCatalog.delete_features()'
4160
+ for more details.
4161
+
4162
+ PARAMETERS:
4163
+ process_id:
4164
+ Required Argument.
4165
+ Specifies the ID of the FeatureProcess to delete from repository.
4166
+ Types: str
4167
+
4168
+ RETURNS:
4169
+ bool
4170
+
4171
+ RAISES:
4172
+ TeradataMLException, TypeError, ValueError
4173
+
4174
+ EXAMPLES:
4175
+ >>> load_example_data('dataframe', ['sales'])
4176
+ # Create a teradataml DataFrame.
4177
+ >>> from teradataml import DataFrame, FeatureProcess, FeatureStore
4178
+ >>> df = DataFrame("sales")
4179
+
4180
+ # Create FeatureStore for repo 'repo'.
4181
+ >>> fs = FeatureStore("repo", data_domain='sales')
4182
+ Repo repo does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
4183
+ # Setup FeatureStore for this repository.
4184
+ >>> fs.setup()
4185
+ True
4186
+
4187
+ # Run FeatureProcess to ingest features.
4188
+ >>> from teradataml import FeatureProcess
4189
+ >>> fp = FeatureProcess(repo='repo',
4190
+ ... data_domain='sales',
4191
+ ... object=df,
4192
+ ... entity='accounts',
4193
+ ... features=['Jan', 'Feb', 'Mar', 'Apr'])
4194
+ >>> fp.run()
4195
+ Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' started.
4196
+ Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' completed.
4197
+
4198
+ # List the available FeatureProcesses.
4199
+ >>> fs.list_feature_processes()
4200
+ description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
4201
+ process_id
4202
+ 2a014f2d-6b71-11f0-aeda-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-07-28 05:10:34.760000+00: 9999-12-31 23:59:59.999999+00:
4203
+
4204
+ # Example: Archive the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
4205
+ >>> fs.archive_feature_process("2a014f2d-6b71-11f0-aeda-f020ffe7fe09")
4206
+ Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
4207
+ Feature 'Jan' is archived from metadata.
4208
+ Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
4209
+ Feature 'Feb' is archived from metadata.
4210
+ Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
4211
+ Feature 'Mar' is archived from metadata.
4212
+ Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
4213
+ Feature 'Apr' is archived from metadata.
4214
+ FeatureProcess with process id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is archived.
4215
+ True
4216
+
4217
+ # Example: Delete the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
4218
+ >>> fs.delete_feature_process('2a014f2d-6b71-11f0-aeda-f020ffe7fe09')
4219
+ Feature 'Feb' deleted successfully from table 'FS_T_e84ff803_3d5c_4793_cd72_251c780fffe4'.
4220
+ Feature 'Jan' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
4221
+ Feature 'Mar' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
4222
+ Feature 'Apr' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
4223
+ FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is deleted.
4224
+ True
4225
+
4226
+ # List the available FeatureProcesses after delete.
4227
+ >>> fs.list_feature_processes()
4228
+ Empty DataFrame
4229
+ Columns: [description, data_domain, process_type, data_source, entity_id, feature_names, feature_ids, valid_start, valid_end]
4230
+ Index: []
4231
+ """
4232
+ argument_validation_params = []
4233
+ argument_validation_params.append(["process_id", process_id, True, str, True])
4234
+
4235
+ # Validate argument types
4236
+ _Validators._validate_function_arguments(argument_validation_params)
4237
+
4238
+ # Before archive check if the specified process id is existed or not.
4239
+ features = self.__validate_feature_process(process_id, type_='delete')
4240
+ if features is False:
4241
+ return False
4242
+
4243
+ feature_details = FeatureCatalog._get_feature_details(
4244
+ self.__repo, self.__data_domain, features)
4245
+
4246
+ # Get the shared features.
4247
+ shared_features = FeatureCatalog._get_shared_features(self.__repo, self.__data_domain)
4248
+
4249
+ return self.__remove_feature_process(
4250
+ process_id, features, feature_details, shared_features, type_='delete')
4251
+
4252
+ @db_transaction
4253
+ def __remove_feature_process(self,
4254
+ process_id,
4255
+ process_features,
4256
+ feature_details,
4257
+ shared_features,
4258
+ type_='archive'):
4259
+ """
4260
+ DESCRIPTION:
4261
+ Internal function to remove the FeatureProcess from repository.
4262
+ It also removes the associated features from the feature table.
4263
+
4264
+ PARAMETERS:
4265
+ process_id:
4266
+ Required Argument.
4267
+ Specifies the ID of the FeatureProcess to remove from repository.
4268
+ Types: str
4269
+
4270
+ feature_details:
4271
+ Required Argument.
4272
+ Specifies the list of features to remove from repository.
4273
+ Types: list of namedtuple
4274
+
4275
+ type_:
4276
+ Optional Argument.
4277
+ Specifies the type of removal. Allowed values are 'archive' and 'delete'.
4278
+ Default value is 'archive'.
4279
+ Types: str
4280
+
4281
+ RETURNS:
4282
+ bool
4283
+
4284
+ RAISES:
4285
+ None
4286
+
4287
+ EXAMPLES:
4288
+ >>> self.__remove_feature_process("5747082b-4acb-11f0-a2d7-f020ffe7fe09",
4289
+ ... process_features=[namedtuple('feature_', ['name', 'id', 'table_name'])('sales_data_Feb', 1, 'FS_T_12345')],
4290
+ ... type_='archive')
4291
+ """
4292
+ self._logger.info(f"Removing feature process '{process_id}' from repository '{self.__repo}', action: {type_}")
4293
+
4294
+ temporal_clause = 'CURRENT VALIDTIME'
4295
+ delete_condition = (Col("process_id") == process_id)
4296
+ if type_ == 'delete':
4297
+ temporal_clause = None
4298
+
4299
+ self._logger.debug(f"Removing {len(process_features)} features from feature catalog for process '{process_id}'")
4300
+ fc = FeatureCatalog(self.__repo, self.__data_domain)
4301
+ res1 = fc._remove_features(process_features, feature_details, type_=='archive', shared_features, process_id)
4302
+
4303
+ # Remove it from feature process table.
4304
+ self._logger.debug(f"Removing process '{process_id}' from feature_process table")
4305
+ res = _delete_data(table_name=self.__table_names["feature_process"],
4306
+ schema_name=self.__repo,
4307
+ delete_conditions=delete_condition,
4308
+ temporal_clause=temporal_clause
4309
+ )
4310
+
4311
+ if res >= 1:
4312
+ print("FeatureProcess with process id '{}' is {}d.".format(process_id, type_))
4313
+ return res1 & True
4314
+
4315
+ print("FeatureProcess with process id '{}' does not exist to {}.".format(process_id, type_))
4316
+ return res1 & False
4317
+
4318
+ def __validate_feature_process(self, process_id, type_='archive'):
4319
+ """
4320
+ DESCRIPTION:
4321
+ Internal function to validate if the feature process is existed or not.
4322
+ Also, the function checks if the process is archived or not.
4323
+
4324
+ PARAMETERS:
4325
+ process_id:
4326
+ Required Argument.
4327
+ Specifies the ID of the FeatureProcess to validate.
4328
+ Types: str
4329
+
4330
+ type_:
4331
+ Optional Argument.
4332
+ Specifies the type of validation. Allowed values are 'archive' and 'delete'.
4333
+ Default value is 'archive'.
4334
+ Types: str
4335
+
4336
+ RETURNS:
4337
+ list or bool.
4338
+ False if process does not exist or archived.
4339
+ list if all validations are passed.
4340
+
4341
+ RAISES:
4342
+ TeradatamlException
4343
+
4344
+ EXAMPLES:
4345
+ >>> # Validate the feature process with process_id '5747082b-4acb-11f0-a2d7-f020ffe7fe09'.
4346
+ >>> fs.__validate_feature_process(process_id='5747082b-4acb-11f0-a2d7-f020ffe7fe09')
4347
+ (['sales_data_Feb', 'sales_data_Jan'], ['sales_data_Mar', 'sales_data_Apr'])
4348
+ """
4349
+ # Extract process type, data source, entity_id, feature_names from given process id.
4350
+ sql = EFS_ARCHIVED_RECORDS.format("feature_names",
4351
+ '"{}"."{}"'.format(self.__repo,
4352
+ self.__table_names["feature_process"]),
4353
+ "PROCESS_ID = '{}' AND DATA_DOMAIN = '{}'".
4354
+ format(process_id, self.__data_domain))
4355
+
4356
+ feature_names = set()
4357
+ all_archived = True
4358
+ any_one_not_archived = False
4359
+ for rec in execute_sql(sql):
4360
+ is_archived = rec[1] == 1
4361
+ all_archived = all_archived and is_archived
4362
+ any_one_not_archived = any_one_not_archived or (not is_archived)
4363
+ feature_names.update([f.strip() for f in rec[0].split(",")])
4364
+
4365
+ # Not raising error to align with the behavior of other methods.
4366
+ if not feature_names:
4367
+ print("FeatureProcess with process id '{}' does not exist.".format(process_id))
4368
+ return False
4369
+
4370
+ # Check if feature is already archived or not.
4371
+ if type_ == 'archive' and all_archived:
4372
+ # All records valid end date should be less than current timestamp in such case.
4373
+ print("FeatureProcess with process id '{}' is already archived.".format(process_id))
4374
+ return False
4375
+
4376
+ # For delete, check if the process is archived or not first.
4377
+ if type_ == 'delete' and any_one_not_archived:
4378
+ print("FeatureProcess with process id '{}' is not archived. "
4379
+ "First archive the process and then delete it.".format(process_id))
4380
+ return False
4381
+
4382
+ # Check if feature is associated with any dataset or not.
4383
+ dataset_features_df = self.__get_dataset_features_df()
4384
+ # Validate the feature names.
4385
+ _Validators._validate_features_not_in_efs_dataset(
4386
+ df=dataset_features_df[(dataset_features_df['data_domain'] == self.__data_domain)],
4387
+ feature_names=list(feature_names),
4388
+ action='archived')
4389
+
4390
+ return feature_names
4391
+
4392
+ def remove_data_domain(self):
4393
+ """
4394
+ DESCRIPTION:
4395
+ Removes the data domain from the FeatureStore and all associated objects.
4396
+
4397
+ Notes:
4398
+ * This operation permanently deletes all objects, tables, and views tied to the data domain.
4399
+ * There is no archival or built‑in recovery, all deletions are irreversible.
4400
+
4401
+ PARAMETERS:
4402
+ None
4403
+
4404
+ RETURNS:
4405
+ bool
4406
+
4407
+ RAISES:
4408
+ TeradataMLException
4409
+
4410
+ EXAMPLES:
4411
+ >>> from teradataml import FeatureStore
4412
+ # Create a new FeatureStore or use an existing one.
4413
+ >>> fs = FeatureStore("repo", data_domain="sales")
4414
+ FeatureStore is ready to use.
4415
+
4416
+ # Remove the data domain 'sales' and all associated objects.
4417
+ >>> fs.remove_data_domain()
4418
+ The function will remove the data domain 'sales' and all associated objects. Are you sure you want to proceed? (Y/N): Y
4419
+ Data domain 'sales' is removed from the FeatureStore.
4420
+ True
4421
+ """
4422
+ self._logger.info(f"Removing data domain '{self.__data_domain}' from repository '{self.__repo}'")
4423
+
4424
+ confirmation = input("The function will remove the data domain '{}' and" \
4425
+ " all associated objects. Are you sure you want to proceed? (Y/N): ".format(self.__data_domain))
4426
+
4427
+ if confirmation not in ["Y", "y"]:
4428
+ self._logger.info(f"Data domain removal cancelled by user")
4429
+ return False
4430
+
4431
+ # Get the views to drop related to the data domain.
4432
+ dataset_features_df = self.__get_dataset_features_df()
4433
+ filtered_dataset_features_df = dataset_features_df[dataset_features_df['data_domain'] == self.__data_domain].itertuples()
4434
+ views_to_drop = list({rec.feature_view for rec in filtered_dataset_features_df})
4435
+
4436
+ # Get the tables to drop related to the data domain.
4437
+ features_metadata_df = self.__get_features_metadata_df()
4438
+ filtered_features_metadata_df = features_metadata_df[features_metadata_df['data_domain'] == self.__data_domain].itertuples()
4439
+ tables_to_drop = list({rec.table_name for rec in filtered_features_metadata_df})
4440
+
4441
+ res = db_transaction(self.__remove_data_domain)()
4442
+
4443
+ # Drop the views related to the data domain.
4444
+ for view in views_to_drop:
4445
+ try:
4446
+ execute_sql(f"DROP VIEW {_get_quoted_object_name(schema_name=self.__repo, object_name=view)}")
4447
+ except Exception as e:
4448
+ print(f"Error dropping view {view}: {e}")
4449
+ # Drop the tables related to the data domain.
4450
+ for table in tables_to_drop:
4451
+ try:
4452
+ execute_sql(f"DROP TABLE {_get_quoted_object_name(schema_name=self.__repo, object_name=table)}")
4453
+ except Exception as e:
4454
+ print(f"Error dropping table {table}: {e}")
4455
+
4456
+ return True
4457
+
4458
+ def __remove_data_domain(self):
4459
+ """
4460
+ DESCRIPTION:
4461
+ Internal method to remove the data domain from the FeatureStore and all associated objects.
4462
+
4463
+ PARAMETERS:
4464
+ None
4465
+
4466
+ RETURNS:
4467
+ bool
4468
+
4469
+ RAISES:
4470
+ TeradataMLException
4471
+
4472
+ EXAMPLES:
4473
+ >>> fs.__remove_data_domain()
4474
+ """
4475
+ self._logger.debug(f"Starting removal of data domain '{self.__data_domain}' from repository '{self.__repo}'")
4476
+ # TO remove data domain from the FeatureStore, we need to:
4477
+ # 1. Remove data domain entries from the dataset catalog and dataset features.
4478
+ # 2. Remove data domain entries from the feature metadata.
4479
+ # 3. Remove data domain entries from the feature processes.
4480
+ # 4. Remove data_domain entries from feature groups, group features, and their staging tables.
4481
+ # 5. Remove data_domain entries from features and their staging tables.
4482
+ # 6. Remove data_domain entries from entities, entity xref, and their staging tables.
4483
+ # 7. Remove data_domain entries from data sources and their staging tables.
4484
+ # 8. Remove data_domain entries from data_domain table.
4485
+
4486
+ # 1. Remove data domain entries from the dataset catalog and dataset features.
4487
+ self._logger.debug(f"Removing data domain '{self.__data_domain}' entries from dataset catalog and dataset features")
4488
+ _delete_data(
4489
+ table_name=self.__table_names['dataset_catalog'],
4490
+ schema_name=self.__repo,
4491
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4492
+ )
4493
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from dataset features")
4494
+
4495
+ _delete_data(
4496
+ table_name=self.__table_names['dataset_features'],
4497
+ schema_name=self.__repo,
4498
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4499
+ )
4500
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from dataset features")
4501
+
4502
+ # 2. Remove data domain entries from the feature metadata.
4503
+ _delete_data(
4504
+ table_name=self.__table_names['feature_metadata'],
4505
+ schema_name=self.__repo,
4506
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4507
+ )
4508
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from feature metadata")
4509
+
4510
+ # 3. Remove data_domain entries from the feature processes.
4511
+ _delete_data(
4512
+ table_name=self.__table_names['feature_process'],
4513
+ schema_name=self.__repo,
4514
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4515
+ )
4516
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from feature processes")
4517
+
4518
+ # 4. Remove data_domain entries from feature groups, group features, and their staging tables.
4519
+ _delete_data(
4520
+ table_name=self.__table_names['group_features'],
4521
+ schema_name=self.__repo,
4522
+ delete_conditions=((Col("group_data_domain") == self.__data_domain))
4523
+ )
4524
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from group features")
4525
+
4526
+ _delete_data(
4527
+ table_name=self.__table_names['feature_group'],
4528
+ schema_name=self.__repo,
4529
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4530
+ )
4531
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from feature groups")
4532
+
4533
+ _delete_data(
4534
+ table_name=self.__table_names["group_features_staging"],
4535
+ schema_name=self.__repo,
4536
+ delete_conditions=(Col("group_data_domain") == self.__data_domain))
4537
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from group features staging")
4538
+
4539
+ _delete_data(
4540
+ table_name=self.__table_names["feature_group_staging"],
4541
+ schema_name=self.__repo,
4542
+ delete_conditions=(Col("data_domain") == self.__data_domain)
4543
+ )
4544
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from feature groups")
4545
+
4546
+ # 5. Remove data_domain entries from features and their staging tables.
4547
+ _delete_data(
4548
+ table_name=self.__table_names['feature'],
4549
+ schema_name=self.__repo,
4550
+ delete_conditions=(Col("data_domain") == self.data_domain)
4551
+ )
4552
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from features")
4553
+
4554
+ _delete_data(
4555
+ table_name=self.__table_names['feature_staging'],
4556
+ schema_name=self.__repo,
4557
+ delete_conditions=(Col("data_domain") == self.data_domain)
4558
+ )
4559
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from feature staging tables")
4560
+
4561
+ # 6. Remove data_domain entries from entities, entity xref, and their staging tables.
4562
+ _delete_data(
4563
+ table_name=self.__table_names['entity_xref'],
4564
+ schema_name=self.__repo,
4565
+ delete_conditions=(Col("data_domain") == self.data_domain)
4566
+ )
4567
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from entity xref")
4568
+
4569
+ _delete_data(
4570
+ table_name=self.__table_names['entity'],
4571
+ schema_name=self.__repo,
4572
+ delete_conditions=(Col("data_domain") == self.data_domain)
4573
+ )
4574
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from entities")
4575
+
4576
+ _delete_data(
4577
+ table_name=self.__table_names['entity_staging'],
4578
+ schema_name=self.__repo,
4579
+ delete_conditions=(Col("data_domain") == self.data_domain)
4580
+ )
4581
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from entity staging tables")
4582
+
4583
+ _delete_data(
4584
+ table_name=self.__table_names['entity_staging_xref'],
4585
+ schema_name=self.__repo,
4586
+ delete_conditions=(Col("data_domain") == self.data_domain)
4587
+ )
4588
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from entity staging xref tables")
4589
+
4590
+ # 7. Remove data_domain entries from data sources and their staging tables.
4591
+ _delete_data(
4592
+ table_name=self.__table_names['data_source'],
4593
+ schema_name=self.__repo,
4594
+ delete_conditions=(Col("data_domain") == self.data_domain)
4595
+ )
4596
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from data sources")
4597
+
4598
+ _delete_data(
4599
+ table_name=self.__table_names['data_source_staging'],
4600
+ schema_name=self.__repo,
4601
+ delete_conditions=(Col("data_domain") == self.data_domain)
4602
+ )
4603
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from data source staging tables")
4604
+
4605
+ # 8. Remove data_domain entries from data_domain table.
4606
+ _delete_data(
4607
+ table_name=self.__table_names['data_domain'],
4608
+ schema_name=self.__repo,
4609
+ delete_conditions=(Col("name") == self.__data_domain)
4610
+ )
4611
+ self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from data domain table")
4612
+
4613
+ self._logger.debug(f"Successfully completed removal of data domain '{self.__data_domain}' from repository '{self.__repo}'")
4614
+ print(f"Data domain '{self.__data_domain}' is removed from the FeatureStore.")
4615
+ return True
4616
+
4617
+ def mind_map(self, feature_process=None):
4618
+ """
4619
+ DESCRIPTION:
4620
+ Returns a visual mind map of the FeatureStore, showing data sources,
4621
+ feature processes, feature catalog, and dataset catalog, with dependencies
4622
+ illustrated by curves.
4623
+ Note:
4624
+ Works only in Jupyter Notebook or similar environments that support HTML rendering.
4625
+
4626
+ PARAMETERS:
4627
+ feature_process:
4628
+ Optional Argument.
4629
+ Specifies the feature process to filter the mind map. When specified,
4630
+ only the feature process and its related data sources, features, and datasets
4631
+ is displayed.
4632
+ Notes:
4633
+ * mind_map() display only the features which are associated with the
4634
+ feature process for the datasets also. For example, if Dataset is associated
4635
+ with Feature1, Feature2 and Feature1 is ingested by FeatureProcess1 and
4636
+ Feature2 is ingested by FeatureProcess2, then mind_map() displays the
4637
+ Dataset with Feature1 only if "feature_process" is set to FeatureProcess1.
4638
+ * If "feature_process" is not specified, then mind_map() displays all the
4639
+ feature processes, data sources, features, and datasets in the FeatureStore.
4640
+ Types: str OR list of str
4641
+
4642
+ RETURNS:
4643
+ None (displays HTML visualization)
4644
+
4645
+ RAISES:
4646
+ TypeError
4647
+
4648
+ EXAMPLES:
4649
+ # Example 1: Display the mind map of the FeatureStore with all feature processes.
4650
+ >>> from teradataml import DataFrame, FeatureStore
4651
+ >>> load_example_data("dataframe", "sales")
4652
+ # Create DataFrame.
4653
+ >>> sales_df = DataFrame("sales")
4654
+ >>> admissions_df = DataFrame("admissions")
4655
+
4656
+ # Create a FeatureStore for the repo 'vfs_v1'.
4657
+ >>> fs = FeatureStore("vfs_v1", data_domain='Analytics')
4658
+ FeatureStore is ready to use.
4659
+
4660
+ # Create a feature process to ingest sales df.
4661
+ >>> fp1 = fs.get_feature_process(object=df,
4662
+ ... features=['Jan', 'Feb', 'Mar', 'Apr'],
4663
+ ... entity='accounts')
4664
+ >>> fp1.run()
4665
+ Process '7b9f76d6-562c-11f0-bb98-c934b24a960f' started.
4666
+ Process '7b9f76d6-562c-11f0-bb98-c934b24a960f' completed.
4667
+ True
4668
+
4669
+ # Create a feature process to ingest admissions df.
4670
+ >>> fp2 = fs.get_feature_process(object=admissions_df,
4671
+ ... features=[ 'masters', 'gpa', 'stats', 'programming', 'admitted'],
4672
+ ... entity='id')
4673
+ >>> fp2.run()
4674
+ Process 'a5de0230-6b8e-11f0-ae70-f020ffe7fe09' started.
4675
+ Process 'a5de0230-6b8e-11f0-ae70-f020ffe7fe09' completed.
4676
+
4677
+ # Example 1: Display the mind map of the FeatureStore.
4678
+ >>> fs.mind_map()
4679
+
4680
+ # Example 2: Display the mind map of the FeatureStore for the sales feature process.
4681
+ >>> fs.mind_map(feature_process=fp1.process_id)
4682
+
4683
+ # Example 3: Display the mind map of the FeatureStore for admissions features.
4684
+ >>> fs.mind_map(feature_process=fp2.process_id)
4685
+
4686
+ # Example 4: Display the mind map of the FeatureStore for both sales and admissions feature
4687
+ # processes.
4688
+ >>> fs.mind_map(feature_process=[fp1.process_id, fp2.process_id])
4689
+ """
4690
+ # Validate arguments
4691
+ argument_validation_params = []
4692
+ argument_validation_params.append(["feature_process", feature_process, True, (str, list), True])
4693
+
4694
+ # Validate argument types
4695
+ _Validators._validate_function_arguments(argument_validation_params)
4696
+
4697
+ # 1. Declare Python variables for the mind map
4698
+ data_sources_ = set()
4699
+ feature_processes_ = set()
4700
+ features_ = set()
4701
+ datasets_ = set()
4702
+ data_source_map = {}
4703
+ feature_process_map = {}
4704
+ dataset_feature_map = {}
4705
+
4706
+ sql = """
4707
+ select distinct process_id, oreplace(data_source, '"', '') as data_source, feature_names from "{}".{}
4708
+ where data_domain = '{}'
4709
+ """.format(self.__repo, EFS_DB_COMPONENTS['feature_process'], self.__data_domain)
4710
+
4711
+ # If user provides feature process, filter the SQL query.
4712
+ if feature_process:
4713
+ feature_process = UtilFuncs._as_list(feature_process)
4714
+ feature_process_str = ', '.join(f"'{fp}'" for fp in feature_process)
4715
+ sql += " and process_id in ({})".format(feature_process_str)
4716
+
4717
+ recs = execute_sql(sql)
4718
+ for rec in recs:
4719
+ process_id, data_source, feature_names = rec
4720
+ data_sources_.add(data_source)
4721
+ feature_processes_.add(process_id)
4722
+ feature_names = [f.strip() for f in feature_names.split(',')]
4723
+ features_.update(feature_names)
4724
+
4725
+ # Populate the maps.
4726
+ if data_source not in data_source_map:
4727
+ data_source_map[data_source] = []
4728
+ data_source_map[data_source].append(process_id)
4729
+
4730
+ if process_id not in feature_process_map:
4731
+ feature_process_map[process_id] = []
4732
+ feature_process_map[process_id].extend(feature_names)
4733
+
4734
+ # feature process map can have duplicates.
4735
+ feature_process_map = {k: list(set(v)) for k, v in feature_process_map.items()}
4736
+
4737
+ data_sources = [{"id": ds, "label": ds} for ds in data_sources_]
4738
+ feature_processes = [{"id": fp, "label": fp} for fp in feature_processes_]
4739
+ features = [{"id": f, "label": f} for f in features_]
4740
+
4741
+ # Create datasets and dataset_feature_map.
4742
+ ds_sql = """
4743
+ select feature_view, feature_name from
4744
+ "{}".{}
4745
+ where data_domain = '{}'
4746
+ """.format(self.__repo, EFS_DB_COMPONENTS['dataset_features'], self.__data_domain)
4747
+
4748
+ # If user provides a specific feature process, then show only those features in datasets.
4749
+ if feature_process:
4750
+ fp_str = ', '.join(f"'{fp}'" for fp in feature_process)
4751
+ ds_sql += " and feature_version IN ({})".format(fp_str)
4752
+
4753
+ recs = execute_sql(ds_sql)
4754
+ for rec in recs:
4755
+ feature_view, feature_name = rec
4756
+ datasets_.add(feature_view)
4757
+ if feature_view not in dataset_feature_map:
4758
+ dataset_feature_map[feature_view] = []
4759
+ dataset_feature_map[feature_view].append(feature_name)
4760
+
4761
+ datasets = [{"id": ds, "label": ds} for ds in datasets_]
4762
+
4763
+ # 2. Add unique suffix to all ids in the variables
4764
+ from time import time as epoch_seconds
4765
+ suffix = f"_fs_{str(epoch_seconds()).replace('.', '_')}"
4766
+
4767
+ def add_suffix_to_list(lst):
4768
+ return [dict(obj, id=obj["id"] + suffix) for obj in lst]
4769
+
4770
+ def add_suffix_to_dict_keys_and_values(dct):
4771
+ return {k + suffix: [v + suffix for v in vs] for k, vs in dct.items()}
4772
+
4773
+ data_sources_js = add_suffix_to_list(data_sources)
4774
+ feature_processes_js = add_suffix_to_list([obj for obj in feature_processes if not obj.get("invisible")])
4775
+ # Keep invisible objects for completeness in features, but filter for display if needed
4776
+ features_js = add_suffix_to_list(features)
4777
+ datasets_js = add_suffix_to_list(datasets)
4778
+ data_source_map_js = add_suffix_to_dict_keys_and_values(data_source_map)
4779
+ feature_process_map_js = add_suffix_to_dict_keys_and_values(feature_process_map)
4780
+ dataset_feature_map_js = add_suffix_to_dict_keys_and_values(dataset_feature_map)
4781
+
4782
+ # 3. Prepare JS variable strings
4783
+ import json
4784
+ js_data_sources = json.dumps(data_sources_js)
4785
+ js_feature_processes = json.dumps(feature_processes_js)
4786
+ js_features = json.dumps(features_js)
4787
+ js_datasets = json.dumps(datasets_js)
4788
+ js_data_source_map = json.dumps(data_source_map_js)
4789
+ js_feature_process_map = json.dumps(feature_process_map_js)
4790
+ js_dataset_feature_map = json.dumps(dataset_feature_map_js)
4791
+
4792
+ # 4. Get current GMT timestamp for display
4793
+ from datetime import datetime, timezone
4794
+ gmt_now = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S GMT')
4795
+
4796
+ # 5. Inject the JS variables, timestamp, and feature store name into the template
4797
+ html_ = _TD_FS_MindMap_Template\
4798
+ .replace("__DATA_SOURCES__", js_data_sources) \
4799
+ .replace("__FEATURE_PROCESSES__", js_feature_processes) \
4800
+ .replace("__FEATURES__", js_features) \
4801
+ .replace("__DATASETS__", js_datasets) \
4802
+ .replace("__DATA_SOURCE_MAP__", js_data_source_map) \
4803
+ .replace("__FEATURE_PROCESS_MAP__", js_feature_process_map) \
4804
+ .replace("__DATASET_FEATURE_MAP__", js_dataset_feature_map) \
4805
+ .replace("__MINDMAP_TIMESTAMP__", gmt_now) \
4806
+ .replace("__REPO__", self.__repo)\
4807
+ .replace("__DATA_DOMAIN__", self.__data_domain)
4808
+
4809
+ # 7. Add the unique suffix to all element IDs in the HTML/JS
4810
+ html_ = html_.replace("_fs_i", suffix)
4811
+
4812
+ from IPython.display import display, HTML
4813
+ display(HTML(html_))
4814
+