teradataml 20.0.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +2762 -0
- teradataml/__init__.py +78 -0
- teradataml/_version.py +11 -0
- teradataml/analytics/Transformations.py +2996 -0
- teradataml/analytics/__init__.py +82 -0
- teradataml/analytics/analytic_function_executor.py +2416 -0
- teradataml/analytics/analytic_query_generator.py +1050 -0
- teradataml/analytics/byom/H2OPredict.py +514 -0
- teradataml/analytics/byom/PMMLPredict.py +437 -0
- teradataml/analytics/byom/__init__.py +16 -0
- teradataml/analytics/json_parser/__init__.py +133 -0
- teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
- teradataml/analytics/json_parser/json_store.py +191 -0
- teradataml/analytics/json_parser/metadata.py +1666 -0
- teradataml/analytics/json_parser/utils.py +805 -0
- teradataml/analytics/meta_class.py +236 -0
- teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
- teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
- teradataml/analytics/sqle/__init__.py +128 -0
- teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
- teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
- teradataml/analytics/table_operator/__init__.py +11 -0
- teradataml/analytics/uaf/__init__.py +82 -0
- teradataml/analytics/utils.py +828 -0
- teradataml/analytics/valib.py +1617 -0
- teradataml/automl/__init__.py +5835 -0
- teradataml/automl/autodataprep/__init__.py +493 -0
- teradataml/automl/custom_json_utils.py +1625 -0
- teradataml/automl/data_preparation.py +1384 -0
- teradataml/automl/data_transformation.py +1254 -0
- teradataml/automl/feature_engineering.py +2273 -0
- teradataml/automl/feature_exploration.py +1873 -0
- teradataml/automl/model_evaluation.py +488 -0
- teradataml/automl/model_training.py +1407 -0
- teradataml/catalog/__init__.py +2 -0
- teradataml/catalog/byom.py +1759 -0
- teradataml/catalog/function_argument_mapper.py +859 -0
- teradataml/catalog/model_cataloging_utils.py +491 -0
- teradataml/clients/__init__.py +0 -0
- teradataml/clients/auth_client.py +137 -0
- teradataml/clients/keycloak_client.py +165 -0
- teradataml/clients/pkce_client.py +481 -0
- teradataml/common/__init__.py +1 -0
- teradataml/common/aed_utils.py +2078 -0
- teradataml/common/bulk_exposed_utils.py +113 -0
- teradataml/common/constants.py +1669 -0
- teradataml/common/deprecations.py +166 -0
- teradataml/common/exceptions.py +147 -0
- teradataml/common/formula.py +743 -0
- teradataml/common/garbagecollector.py +666 -0
- teradataml/common/logger.py +1261 -0
- teradataml/common/messagecodes.py +518 -0
- teradataml/common/messages.py +262 -0
- teradataml/common/pylogger.py +67 -0
- teradataml/common/sqlbundle.py +764 -0
- teradataml/common/td_coltype_code_to_tdtype.py +48 -0
- teradataml/common/utils.py +3166 -0
- teradataml/common/warnings.py +36 -0
- teradataml/common/wrapper_utils.py +625 -0
- teradataml/config/__init__.py +0 -0
- teradataml/config/dummy_file1.cfg +5 -0
- teradataml/config/dummy_file2.cfg +3 -0
- teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
- teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
- teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
- teradataml/context/__init__.py +0 -0
- teradataml/context/aed_context.py +223 -0
- teradataml/context/context.py +1462 -0
- teradataml/data/A_loan.csv +19 -0
- teradataml/data/BINARY_REALS_LEFT.csv +11 -0
- teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
- teradataml/data/B_loan.csv +49 -0
- teradataml/data/BuoyData2.csv +17 -0
- teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
- teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
- teradataml/data/Convolve2RealsLeft.csv +5 -0
- teradataml/data/Convolve2RealsRight.csv +5 -0
- teradataml/data/Convolve2ValidLeft.csv +11 -0
- teradataml/data/Convolve2ValidRight.csv +11 -0
- teradataml/data/DFFTConv_Real_8_8.csv +65 -0
- teradataml/data/Employee.csv +5 -0
- teradataml/data/Employee_Address.csv +4 -0
- teradataml/data/Employee_roles.csv +5 -0
- teradataml/data/JulesBelvezeDummyData.csv +100 -0
- teradataml/data/Mall_customer_data.csv +201 -0
- teradataml/data/Orders1_12mf.csv +25 -0
- teradataml/data/Pi_loan.csv +7 -0
- teradataml/data/SMOOTHED_DATA.csv +7 -0
- teradataml/data/TestDFFT8.csv +9 -0
- teradataml/data/TestRiver.csv +109 -0
- teradataml/data/Traindata.csv +28 -0
- teradataml/data/__init__.py +0 -0
- teradataml/data/acf.csv +17 -0
- teradataml/data/adaboost_example.json +34 -0
- teradataml/data/adaboostpredict_example.json +24 -0
- teradataml/data/additional_table.csv +11 -0
- teradataml/data/admissions_test.csv +21 -0
- teradataml/data/admissions_train.csv +41 -0
- teradataml/data/admissions_train_nulls.csv +41 -0
- teradataml/data/advertising.csv +201 -0
- teradataml/data/ageandheight.csv +13 -0
- teradataml/data/ageandpressure.csv +31 -0
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/antiselect_example.json +36 -0
- teradataml/data/antiselect_input.csv +8 -0
- teradataml/data/antiselect_input_mixed_case.csv +8 -0
- teradataml/data/applicant_external.csv +7 -0
- teradataml/data/applicant_reference.csv +7 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/arima_example.json +9 -0
- teradataml/data/assortedtext_input.csv +8 -0
- teradataml/data/attribution_example.json +34 -0
- teradataml/data/attribution_sample_table.csv +27 -0
- teradataml/data/attribution_sample_table1.csv +6 -0
- teradataml/data/attribution_sample_table2.csv +11 -0
- teradataml/data/bank_churn.csv +10001 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bank_web_clicks1.csv +43 -0
- teradataml/data/bank_web_clicks2.csv +91 -0
- teradataml/data/bank_web_url.csv +85 -0
- teradataml/data/barrier.csv +2 -0
- teradataml/data/barrier_new.csv +3 -0
- teradataml/data/betweenness_example.json +14 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/bin_breaks.csv +8 -0
- teradataml/data/bin_fit_ip.csv +4 -0
- teradataml/data/binary_complex_left.csv +11 -0
- teradataml/data/binary_complex_right.csv +11 -0
- teradataml/data/binary_matrix_complex_left.csv +21 -0
- teradataml/data/binary_matrix_complex_right.csv +21 -0
- teradataml/data/binary_matrix_real_left.csv +21 -0
- teradataml/data/binary_matrix_real_right.csv +21 -0
- teradataml/data/blood2ageandweight.csv +26 -0
- teradataml/data/bmi.csv +501 -0
- teradataml/data/boston.csv +507 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/buoydata_mix.csv +11 -0
- teradataml/data/burst_data.csv +5 -0
- teradataml/data/burst_example.json +21 -0
- teradataml/data/byom_example.json +34 -0
- teradataml/data/bytes_table.csv +4 -0
- teradataml/data/cal_housing_ex_raw.csv +70 -0
- teradataml/data/callers.csv +7 -0
- teradataml/data/calls.csv +10 -0
- teradataml/data/cars_hist.csv +33 -0
- teradataml/data/cat_table.csv +25 -0
- teradataml/data/ccm_example.json +32 -0
- teradataml/data/ccm_input.csv +91 -0
- teradataml/data/ccm_input2.csv +13 -0
- teradataml/data/ccmexample.csv +101 -0
- teradataml/data/ccmprepare_example.json +9 -0
- teradataml/data/ccmprepare_input.csv +91 -0
- teradataml/data/cfilter_example.json +12 -0
- teradataml/data/changepointdetection_example.json +18 -0
- teradataml/data/changepointdetectionrt_example.json +8 -0
- teradataml/data/chi_sq.csv +3 -0
- teradataml/data/churn_data.csv +14 -0
- teradataml/data/churn_emission.csv +35 -0
- teradataml/data/churn_initial.csv +3 -0
- teradataml/data/churn_state_transition.csv +5 -0
- teradataml/data/citedges_2.csv +745 -0
- teradataml/data/citvertices_2.csv +1210 -0
- teradataml/data/clicks2.csv +16 -0
- teradataml/data/clickstream.csv +13 -0
- teradataml/data/clickstream1.csv +11 -0
- teradataml/data/closeness_example.json +16 -0
- teradataml/data/complaints.csv +21 -0
- teradataml/data/complaints_mini.csv +3 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_testtoken.csv +224 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/complaints_tokens_test.csv +353 -0
- teradataml/data/complaints_traintoken.csv +472 -0
- teradataml/data/computers_category.csv +1001 -0
- teradataml/data/computers_test1.csv +1252 -0
- teradataml/data/computers_train1.csv +5009 -0
- teradataml/data/computers_train1_clustered.csv +5009 -0
- teradataml/data/confusionmatrix_example.json +9 -0
- teradataml/data/conversion_event_table.csv +3 -0
- teradataml/data/corr_input.csv +17 -0
- teradataml/data/correlation_example.json +11 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/coxhazardratio_example.json +39 -0
- teradataml/data/coxph_example.json +15 -0
- teradataml/data/coxsurvival_example.json +28 -0
- teradataml/data/cpt.csv +41 -0
- teradataml/data/credit_ex_merged.csv +45 -0
- teradataml/data/creditcard_data.csv +1001 -0
- teradataml/data/customer_loyalty.csv +301 -0
- teradataml/data/customer_loyalty_newseq.csv +31 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +173 -0
- teradataml/data/decisionforest_example.json +37 -0
- teradataml/data/decisionforestpredict_example.json +38 -0
- teradataml/data/decisiontree_example.json +21 -0
- teradataml/data/decisiontreepredict_example.json +45 -0
- teradataml/data/dfft2_size4_real.csv +17 -0
- teradataml/data/dfft2_test_matrix16.csv +17 -0
- teradataml/data/dfft2conv_real_4_4.csv +65 -0
- teradataml/data/diabetes.csv +443 -0
- teradataml/data/diabetes_test.csv +89 -0
- teradataml/data/dict_table.csv +5 -0
- teradataml/data/docperterm_table.csv +4 -0
- teradataml/data/docs/__init__.py +1 -0
- teradataml/data/docs/byom/__init__.py +0 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
- teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
- teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
- teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
- teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
- teradataml/data/docs/byom/docs/__init__.py +0 -0
- teradataml/data/docs/sqle/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
- teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
- teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
- teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
- teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
- teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
- teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
- teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
- teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
- teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
- teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
- teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
- teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
- teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
- teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
- teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
- teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
- teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
- teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
- teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
- teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
- teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
- teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
- teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
- teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
- teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
- teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
- teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
- teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
- teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
- teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
- teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
- teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
- teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
- teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
- teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
- teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
- teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
- teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
- teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
- teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
- teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
- teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
- teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
- teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
- teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/tableoperator/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
- teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
- teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
- teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
- teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
- teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
- teradataml/data/docs/uaf/__init__.py +0 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
- teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
- teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
- teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
- teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
- teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
- teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
- teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
- teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
- teradataml/data/dtw_example.json +18 -0
- teradataml/data/dtw_t1.csv +11 -0
- teradataml/data/dtw_t2.csv +4 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt2d_example.json +16 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_example.json +15 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/dwt_filter_dim.csv +5 -0
- teradataml/data/emission.csv +9 -0
- teradataml/data/emp_table_by_dept.csv +19 -0
- teradataml/data/employee_info.csv +4 -0
- teradataml/data/employee_table.csv +6 -0
- teradataml/data/excluding_event_table.csv +2 -0
- teradataml/data/finance_data.csv +6 -0
- teradataml/data/finance_data2.csv +61 -0
- teradataml/data/finance_data3.csv +93 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/fish.csv +160 -0
- teradataml/data/fm_blood2ageandweight.csv +26 -0
- teradataml/data/fmeasure_example.json +12 -0
- teradataml/data/followers_leaders.csv +10 -0
- teradataml/data/fpgrowth_example.json +12 -0
- teradataml/data/frequentpaths_example.json +29 -0
- teradataml/data/friends.csv +9 -0
- teradataml/data/fs_input.csv +33 -0
- teradataml/data/fs_input1.csv +33 -0
- teradataml/data/genData.csv +513 -0
- teradataml/data/geodataframe_example.json +40 -0
- teradataml/data/glass_types.csv +215 -0
- teradataml/data/glm_admissions_model.csv +12 -0
- teradataml/data/glm_example.json +56 -0
- teradataml/data/glml1l2_example.json +28 -0
- teradataml/data/glml1l2predict_example.json +54 -0
- teradataml/data/glmpredict_example.json +54 -0
- teradataml/data/gq_t1.csv +21 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/hconvolve_complex_right.csv +5 -0
- teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
- teradataml/data/histogram_example.json +12 -0
- teradataml/data/hmmdecoder_example.json +79 -0
- teradataml/data/hmmevaluator_example.json +25 -0
- teradataml/data/hmmsupervised_example.json +10 -0
- teradataml/data/hmmunsupervised_example.json +8 -0
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/house_values.csv +12 -0
- teradataml/data/house_values2.csv +13 -0
- teradataml/data/housing_cat.csv +7 -0
- teradataml/data/housing_data.csv +9 -0
- teradataml/data/housing_test.csv +47 -0
- teradataml/data/housing_test_binary.csv +47 -0
- teradataml/data/housing_train.csv +493 -0
- teradataml/data/housing_train_attribute.csv +5 -0
- teradataml/data/housing_train_binary.csv +437 -0
- teradataml/data/housing_train_parameter.csv +2 -0
- teradataml/data/housing_train_response.csv +493 -0
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/ibm_stock.csv +370 -0
- teradataml/data/ibm_stock1.csv +370 -0
- teradataml/data/identitymatch_example.json +22 -0
- teradataml/data/idf_table.csv +4 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/impressions.csv +101 -0
- teradataml/data/inflation.csv +21 -0
- teradataml/data/initial.csv +3 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/insect_sprays.csv +13 -0
- teradataml/data/insurance.csv +1339 -0
- teradataml/data/interpolator_example.json +13 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/iris_altinput.csv +481 -0
- teradataml/data/iris_attribute_output.csv +8 -0
- teradataml/data/iris_attribute_test.csv +121 -0
- teradataml/data/iris_attribute_train.csv +481 -0
- teradataml/data/iris_category_expect_predict.csv +31 -0
- teradataml/data/iris_data.csv +151 -0
- teradataml/data/iris_input.csv +151 -0
- teradataml/data/iris_response_train.csv +121 -0
- teradataml/data/iris_test.csv +31 -0
- teradataml/data/iris_train.csv +121 -0
- teradataml/data/join_table1.csv +4 -0
- teradataml/data/join_table2.csv +4 -0
- teradataml/data/jsons/anly_function_name.json +7 -0
- teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
- teradataml/data/jsons/byom/dataikupredict.json +148 -0
- teradataml/data/jsons/byom/datarobotpredict.json +147 -0
- teradataml/data/jsons/byom/h2opredict.json +195 -0
- teradataml/data/jsons/byom/onnxembeddings.json +267 -0
- teradataml/data/jsons/byom/onnxpredict.json +187 -0
- teradataml/data/jsons/byom/pmmlpredict.json +147 -0
- teradataml/data/jsons/paired_functions.json +450 -0
- teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
- teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
- teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
- teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
- teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
- teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
- teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
- teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
- teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
- teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
- teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
- teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
- teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
- teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
- teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
- teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
- teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
- teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
- teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
- teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
- teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
- teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
- teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
- teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
- teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
- teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
- teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
- teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
- teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
- teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
- teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
- teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
- teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
- teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
- teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
- teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
- teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
- teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
- teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
- teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
- teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
- teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
- teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
- teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
- teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
- teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
- teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
- teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
- teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
- teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
- teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
- teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
- teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
- teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
- teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
- teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
- teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
- teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
- teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
- teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
- teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
- teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
- teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
- teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
- teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
- teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
- teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
- teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
- teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
- teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
- teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
- teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
- teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
- teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
- teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
- teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
- teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
- teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
- teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
- teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/kmeans_example.json +23 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/kmeans_us_arrests_data.csv +51 -0
- teradataml/data/knn_example.json +19 -0
- teradataml/data/knnrecommender_example.json +7 -0
- teradataml/data/knnrecommenderpredict_example.json +12 -0
- teradataml/data/lar_example.json +17 -0
- teradataml/data/larpredict_example.json +30 -0
- teradataml/data/lc_new_predictors.csv +5 -0
- teradataml/data/lc_new_reference.csv +9 -0
- teradataml/data/lda_example.json +9 -0
- teradataml/data/ldainference_example.json +15 -0
- teradataml/data/ldatopicsummary_example.json +9 -0
- teradataml/data/levendist_input.csv +13 -0
- teradataml/data/levenshteindistance_example.json +10 -0
- teradataml/data/linreg_example.json +10 -0
- teradataml/data/load_example_data.py +350 -0
- teradataml/data/loan_prediction.csv +295 -0
- teradataml/data/lungcancer.csv +138 -0
- teradataml/data/mappingdata.csv +12 -0
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/milk_timeseries.csv +157 -0
- teradataml/data/min_max_titanic.csv +4 -0
- teradataml/data/minhash_example.json +6 -0
- teradataml/data/ml_ratings.csv +7547 -0
- teradataml/data/ml_ratings_10.csv +2445 -0
- teradataml/data/mobile_data.csv +13 -0
- teradataml/data/model1_table.csv +5 -0
- teradataml/data/model2_table.csv +5 -0
- teradataml/data/models/License_file.txt +1 -0
- teradataml/data/models/License_file_empty.txt +0 -0
- teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
- teradataml/data/models/dr_iris_rf +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
- teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
- teradataml/data/models/iris_db_glm_model.pmml +57 -0
- teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
- teradataml/data/models/iris_kmeans_model +0 -0
- teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
- teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
- teradataml/data/modularity_example.json +12 -0
- teradataml/data/movavg_example.json +8 -0
- teradataml/data/mtx1.csv +7 -0
- teradataml/data/mtx2.csv +13 -0
- teradataml/data/multi_model_classification.csv +401 -0
- teradataml/data/multi_model_regression.csv +401 -0
- teradataml/data/mvdfft8.csv +9 -0
- teradataml/data/naivebayes_example.json +10 -0
- teradataml/data/naivebayespredict_example.json +19 -0
- teradataml/data/naivebayestextclassifier2_example.json +7 -0
- teradataml/data/naivebayestextclassifier_example.json +8 -0
- teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
- teradataml/data/name_Find_configure.csv +10 -0
- teradataml/data/namedentityfinder_example.json +14 -0
- teradataml/data/namedentityfinderevaluator_example.json +10 -0
- teradataml/data/namedentityfindertrainer_example.json +6 -0
- teradataml/data/nb_iris_input_test.csv +31 -0
- teradataml/data/nb_iris_input_train.csv +121 -0
- teradataml/data/nbp_iris_model.csv +13 -0
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_extractor_text.csv +2 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/ner_sports_test2.csv +29 -0
- teradataml/data/ner_sports_train.csv +501 -0
- teradataml/data/nerevaluator_example.json +6 -0
- teradataml/data/nerextractor_example.json +18 -0
- teradataml/data/nermem_sports_test.csv +18 -0
- teradataml/data/nermem_sports_train.csv +51 -0
- teradataml/data/nertrainer_example.json +7 -0
- teradataml/data/ngrams_example.json +7 -0
- teradataml/data/notebooks/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
- teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
- teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
- teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
- teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
- teradataml/data/npath_example.json +23 -0
- teradataml/data/ntree_example.json +14 -0
- teradataml/data/numeric_strings.csv +5 -0
- teradataml/data/numerics.csv +4 -0
- teradataml/data/ocean_buoy.csv +17 -0
- teradataml/data/ocean_buoy2.csv +17 -0
- teradataml/data/ocean_buoys.csv +28 -0
- teradataml/data/ocean_buoys2.csv +10 -0
- teradataml/data/ocean_buoys_nonpti.csv +28 -0
- teradataml/data/ocean_buoys_seq.csv +29 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +92 -0
- teradataml/data/optional_event_table.csv +4 -0
- teradataml/data/orders1.csv +11 -0
- teradataml/data/orders1_12.csv +13 -0
- teradataml/data/orders_ex.csv +4 -0
- teradataml/data/pack_example.json +9 -0
- teradataml/data/package_tracking.csv +19 -0
- teradataml/data/package_tracking_pti.csv +19 -0
- teradataml/data/pagerank_example.json +13 -0
- teradataml/data/paragraphs_input.csv +6 -0
- teradataml/data/pathanalyzer_example.json +8 -0
- teradataml/data/pathgenerator_example.json +8 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/pattern_matching_data.csv +11 -0
- teradataml/data/payment_fraud_dataset.csv +10001 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/phrases.csv +7 -0
- teradataml/data/pivot_example.json +9 -0
- teradataml/data/pivot_input.csv +22 -0
- teradataml/data/playerRating.csv +31 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/postagger_example.json +7 -0
- teradataml/data/posttagger_output.csv +44 -0
- teradataml/data/production_data.csv +17 -0
- teradataml/data/production_data2.csv +7 -0
- teradataml/data/randomsample_example.json +32 -0
- teradataml/data/randomwalksample_example.json +9 -0
- teradataml/data/rank_table.csv +6 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/ref_mobile_data.csv +4 -0
- teradataml/data/ref_mobile_data_dense.csv +2 -0
- teradataml/data/ref_url.csv +17 -0
- teradataml/data/restaurant_reviews.csv +7 -0
- teradataml/data/retail_churn_table.csv +27772 -0
- teradataml/data/river_data.csv +145 -0
- teradataml/data/roc_example.json +8 -0
- teradataml/data/roc_input.csv +101 -0
- teradataml/data/rule_inputs.csv +6 -0
- teradataml/data/rule_table.csv +2 -0
- teradataml/data/sales.csv +7 -0
- teradataml/data/sales_transaction.csv +501 -0
- teradataml/data/salesdata.csv +342 -0
- teradataml/data/sample_cities.csv +3 -0
- teradataml/data/sample_shapes.csv +11 -0
- teradataml/data/sample_streets.csv +3 -0
- teradataml/data/sampling_example.json +16 -0
- teradataml/data/sax_example.json +17 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +74 -0
- teradataml/data/scale_housing.csv +11 -0
- teradataml/data/scale_housing_test.csv +6 -0
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scale_stat.csv +11 -0
- teradataml/data/scalebypartition_example.json +13 -0
- teradataml/data/scalemap_example.json +13 -0
- teradataml/data/scalesummary_example.json +12 -0
- teradataml/data/score_category.csv +101 -0
- teradataml/data/score_summary.csv +4 -0
- teradataml/data/script_example.json +10 -0
- teradataml/data/scripts/deploy_script.py +84 -0
- teradataml/data/scripts/lightgbm/dataset.template +175 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
- teradataml/data/scripts/mapper.R +20 -0
- teradataml/data/scripts/mapper.py +16 -0
- teradataml/data/scripts/mapper_replace.py +16 -0
- teradataml/data/scripts/sklearn/__init__.py +0 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
- teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
- teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
- teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
- teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
- teradataml/data/seeds.csv +10 -0
- teradataml/data/sentenceextractor_example.json +7 -0
- teradataml/data/sentiment_extract_input.csv +11 -0
- teradataml/data/sentiment_train.csv +16 -0
- teradataml/data/sentiment_word.csv +20 -0
- teradataml/data/sentiment_word_input.csv +20 -0
- teradataml/data/sentimentextractor_example.json +24 -0
- teradataml/data/sentimenttrainer_example.json +8 -0
- teradataml/data/sequence_table.csv +10 -0
- teradataml/data/seriessplitter_example.json +8 -0
- teradataml/data/sessionize_example.json +17 -0
- teradataml/data/sessionize_table.csv +116 -0
- teradataml/data/setop_test1.csv +24 -0
- teradataml/data/setop_test2.csv +22 -0
- teradataml/data/soc_nw_edges.csv +11 -0
- teradataml/data/soc_nw_vertices.csv +8 -0
- teradataml/data/souvenir_timeseries.csv +168 -0
- teradataml/data/sparse_iris_attribute.csv +5 -0
- teradataml/data/sparse_iris_test.csv +121 -0
- teradataml/data/sparse_iris_train.csv +601 -0
- teradataml/data/star1.csv +6 -0
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/state_transition.csv +5 -0
- teradataml/data/stock_data.csv +53 -0
- teradataml/data/stock_movement.csv +11 -0
- teradataml/data/stock_vol.csv +76 -0
- teradataml/data/stop_words.csv +8 -0
- teradataml/data/store_sales.csv +37 -0
- teradataml/data/stringsimilarity_example.json +8 -0
- teradataml/data/strsimilarity_input.csv +13 -0
- teradataml/data/students.csv +101 -0
- teradataml/data/svm_iris_input_test.csv +121 -0
- teradataml/data/svm_iris_input_train.csv +481 -0
- teradataml/data/svm_iris_model.csv +7 -0
- teradataml/data/svmdense_example.json +10 -0
- teradataml/data/svmdensepredict_example.json +19 -0
- teradataml/data/svmsparse_example.json +8 -0
- teradataml/data/svmsparsepredict_example.json +14 -0
- teradataml/data/svmsparsesummary_example.json +8 -0
- teradataml/data/target_mobile_data.csv +13 -0
- teradataml/data/target_mobile_data_dense.csv +5 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/templatedata.csv +1201 -0
- teradataml/data/templates/open_source_ml.json +11 -0
- teradataml/data/teradata_icon.ico +0 -0
- teradataml/data/teradataml_example.json +1473 -0
- teradataml/data/test_classification.csv +101 -0
- teradataml/data/test_loan_prediction.csv +53 -0
- teradataml/data/test_pacf_12.csv +37 -0
- teradataml/data/test_prediction.csv +101 -0
- teradataml/data/test_regression.csv +101 -0
- teradataml/data/test_river2.csv +109 -0
- teradataml/data/text_inputs.csv +6 -0
- teradataml/data/textchunker_example.json +8 -0
- teradataml/data/textclassifier_example.json +7 -0
- teradataml/data/textclassifier_input.csv +7 -0
- teradataml/data/textclassifiertrainer_example.json +7 -0
- teradataml/data/textmorph_example.json +11 -0
- teradataml/data/textparser_example.json +15 -0
- teradataml/data/texttagger_example.json +12 -0
- teradataml/data/texttokenizer_example.json +7 -0
- teradataml/data/texttrainer_input.csv +11 -0
- teradataml/data/tf_example.json +7 -0
- teradataml/data/tfidf_example.json +14 -0
- teradataml/data/tfidf_input1.csv +201 -0
- teradataml/data/tfidf_train.csv +6 -0
- teradataml/data/time_table1.csv +535 -0
- teradataml/data/time_table2.csv +14 -0
- teradataml/data/timeseriesdata.csv +1601 -0
- teradataml/data/timeseriesdatasetsd4.csv +105 -0
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic.csv +892 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/token_table.csv +696 -0
- teradataml/data/train_multiclass.csv +101 -0
- teradataml/data/train_regression.csv +101 -0
- teradataml/data/train_regression_multiple_labels.csv +101 -0
- teradataml/data/train_tracking.csv +28 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/transformation_table.csv +6 -0
- teradataml/data/transformation_table_new.csv +2 -0
- teradataml/data/tv_spots.csv +16 -0
- teradataml/data/twod_climate_data.csv +117 -0
- teradataml/data/uaf_example.json +529 -0
- teradataml/data/univariatestatistics_example.json +9 -0
- teradataml/data/unpack_example.json +10 -0
- teradataml/data/unpivot_example.json +25 -0
- teradataml/data/unpivot_input.csv +8 -0
- teradataml/data/url_data.csv +10 -0
- teradataml/data/us_air_pass.csv +37 -0
- teradataml/data/us_population.csv +624 -0
- teradataml/data/us_states_shapes.csv +52 -0
- teradataml/data/varmax_example.json +18 -0
- teradataml/data/vectordistance_example.json +30 -0
- teradataml/data/ville_climatedata.csv +121 -0
- teradataml/data/ville_tempdata.csv +12 -0
- teradataml/data/ville_tempdata1.csv +12 -0
- teradataml/data/ville_temperature.csv +11 -0
- teradataml/data/waveletTable.csv +1605 -0
- teradataml/data/waveletTable2.csv +1605 -0
- teradataml/data/weightedmovavg_example.json +9 -0
- teradataml/data/wft_testing.csv +5 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/data/wine_data.csv +1600 -0
- teradataml/data/word_embed_input_table1.csv +6 -0
- teradataml/data/word_embed_input_table2.csv +5 -0
- teradataml/data/word_embed_model.csv +23 -0
- teradataml/data/words_input.csv +13 -0
- teradataml/data/xconvolve_complex_left.csv +6 -0
- teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
- teradataml/data/xgboost_example.json +36 -0
- teradataml/data/xgboostpredict_example.json +32 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/__init__.py +0 -0
- teradataml/dataframe/copy_to.py +2446 -0
- teradataml/dataframe/data_transfer.py +2840 -0
- teradataml/dataframe/dataframe.py +20908 -0
- teradataml/dataframe/dataframe_utils.py +2114 -0
- teradataml/dataframe/fastload.py +794 -0
- teradataml/dataframe/functions.py +2110 -0
- teradataml/dataframe/indexer.py +424 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +1171 -0
- teradataml/dataframe/sql.py +10904 -0
- teradataml/dataframe/sql_function_parameters.py +440 -0
- teradataml/dataframe/sql_functions.py +652 -0
- teradataml/dataframe/sql_interfaces.py +220 -0
- teradataml/dataframe/vantage_function_types.py +675 -0
- teradataml/dataframe/window.py +694 -0
- teradataml/dbutils/__init__.py +3 -0
- teradataml/dbutils/dbutils.py +2871 -0
- teradataml/dbutils/filemgr.py +318 -0
- teradataml/gen_ai/__init__.py +2 -0
- teradataml/gen_ai/convAI.py +473 -0
- teradataml/geospatial/__init__.py +4 -0
- teradataml/geospatial/geodataframe.py +1105 -0
- teradataml/geospatial/geodataframecolumn.py +392 -0
- teradataml/geospatial/geometry_types.py +926 -0
- teradataml/hyperparameter_tuner/__init__.py +1 -0
- teradataml/hyperparameter_tuner/optimizer.py +4115 -0
- teradataml/hyperparameter_tuner/utils.py +303 -0
- teradataml/lib/__init__.py +0 -0
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/lib/libaed_0_1_aarch64.so +0 -0
- teradataml/lib/libaed_0_1_ppc64le.so +0 -0
- teradataml/opensource/__init__.py +1 -0
- teradataml/opensource/_base.py +1321 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/_constants.py +61 -0
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +267 -0
- teradataml/options/__init__.py +148 -0
- teradataml/options/configure.py +489 -0
- teradataml/options/display.py +187 -0
- teradataml/plot/__init__.py +3 -0
- teradataml/plot/axis.py +1427 -0
- teradataml/plot/constants.py +15 -0
- teradataml/plot/figure.py +431 -0
- teradataml/plot/plot.py +810 -0
- teradataml/plot/query_generator.py +83 -0
- teradataml/plot/subplot.py +216 -0
- teradataml/scriptmgmt/UserEnv.py +4273 -0
- teradataml/scriptmgmt/__init__.py +3 -0
- teradataml/scriptmgmt/lls_utils.py +2157 -0
- teradataml/sdk/README.md +79 -0
- teradataml/sdk/__init__.py +4 -0
- teradataml/sdk/_auth_modes.py +422 -0
- teradataml/sdk/_func_params.py +487 -0
- teradataml/sdk/_json_parser.py +453 -0
- teradataml/sdk/_openapi_spec_constants.py +249 -0
- teradataml/sdk/_utils.py +236 -0
- teradataml/sdk/api_client.py +900 -0
- teradataml/sdk/constants.py +62 -0
- teradataml/sdk/modelops/__init__.py +98 -0
- teradataml/sdk/modelops/_client.py +409 -0
- teradataml/sdk/modelops/_constants.py +304 -0
- teradataml/sdk/modelops/models.py +2308 -0
- teradataml/sdk/spinner.py +107 -0
- teradataml/series/__init__.py +0 -0
- teradataml/series/series.py +537 -0
- teradataml/series/series_utils.py +71 -0
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +658 -0
- teradataml/store/feature_store/feature_store.py +4814 -0
- teradataml/store/feature_store/mind_map.py +639 -0
- teradataml/store/feature_store/models.py +7330 -0
- teradataml/store/feature_store/utils.py +390 -0
- teradataml/table_operators/Apply.py +979 -0
- teradataml/table_operators/Script.py +1739 -0
- teradataml/table_operators/TableOperator.py +1343 -0
- teradataml/table_operators/__init__.py +2 -0
- teradataml/table_operators/apply_query_generator.py +262 -0
- teradataml/table_operators/query_generator.py +493 -0
- teradataml/table_operators/table_operator_query_generator.py +462 -0
- teradataml/table_operators/table_operator_util.py +726 -0
- teradataml/table_operators/templates/dataframe_apply.template +184 -0
- teradataml/table_operators/templates/dataframe_map.template +176 -0
- teradataml/table_operators/templates/dataframe_register.template +73 -0
- teradataml/table_operators/templates/dataframe_udf.template +67 -0
- teradataml/table_operators/templates/script_executor.template +170 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +53 -0
- teradataml/utils/__init__.py +0 -0
- teradataml/utils/docstring.py +527 -0
- teradataml/utils/dtypes.py +943 -0
- teradataml/utils/internal_buffer.py +122 -0
- teradataml/utils/print_versions.py +206 -0
- teradataml/utils/utils.py +451 -0
- teradataml/utils/validators.py +3305 -0
- teradataml-20.0.0.8.dist-info/METADATA +2804 -0
- teradataml-20.0.0.8.dist-info/RECORD +1208 -0
- teradataml-20.0.0.8.dist-info/WHEEL +5 -0
- teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
- teradataml-20.0.0.8.dist-info/zip-safe +1 -0
|
@@ -0,0 +1,4814 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Copyright (c) 2024 by Teradata Corporation. All rights reserved.
|
|
3
|
+
TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
|
|
4
|
+
|
|
5
|
+
Primary Owner: pradeep.garre@teradata.com
|
|
6
|
+
Secondary Owner: adithya.avvaru@teradata.com
|
|
7
|
+
|
|
8
|
+
This file implements the core framework that allows user to use Teradata Enterprise Feature Store.
|
|
9
|
+
"""
|
|
10
|
+
import os.path
|
|
11
|
+
import operator
|
|
12
|
+
import random
|
|
13
|
+
from functools import reduce
|
|
14
|
+
from sqlalchemy import literal_column
|
|
15
|
+
from teradataml.context.context import get_connection, _get_current_databasename
|
|
16
|
+
from teradataml.common.constants import SQLConstants, AccessQueries
|
|
17
|
+
from teradataml.common.exceptions import TeradataMlException
|
|
18
|
+
from teradataml.common.messages import Messages
|
|
19
|
+
from teradataml.common.messagecodes import MessageCodes
|
|
20
|
+
from teradataml.dataframe.sql import _SQLColumnExpression as Col
|
|
21
|
+
from teradataml.dbutils.dbutils import _create_database, _create_table, db_drop_table, execute_sql, Grant, Revoke, \
|
|
22
|
+
_update_data, _delete_data, db_transaction, db_list_tables, _insert_data, \
|
|
23
|
+
_is_trigger_exist, db_drop_view, _get_quoted_object_name
|
|
24
|
+
from teradataml.store.feature_store.constants import *
|
|
25
|
+
from teradataml.store.feature_store.mind_map import _TD_FS_MindMap_Template
|
|
26
|
+
from teradataml.store.feature_store.models import *
|
|
27
|
+
from teradataml.store.feature_store.constants import _FeatureStoreDFContainer
|
|
28
|
+
from teradataml.common.sqlbundle import SQLBundle
|
|
29
|
+
from teradataml.utils.validators import _Validators
|
|
30
|
+
from teradataml.store.feature_store.utils import _FSUtils
|
|
31
|
+
from teradataml.common.logger import TeradataMlLogger, get_td_logger
|
|
32
|
+
|
|
33
|
+
@TeradataMlLogger
|
|
34
|
+
class FeatureStore:
|
|
35
|
+
"""Class for FeatureStore."""
|
|
36
|
+
|
|
37
|
+
def __init__(self,
|
|
38
|
+
repo,
|
|
39
|
+
data_domain=None,
|
|
40
|
+
check=True):
|
|
41
|
+
"""
|
|
42
|
+
DESCRIPTION:
|
|
43
|
+
Method to create FeatureStore in teradataml.
|
|
44
|
+
Note:
|
|
45
|
+
* One should establish a connection to Vantage using create_context()
|
|
46
|
+
before creating a FeatureStore object.
|
|
47
|
+
|
|
48
|
+
PARAMETERS:
|
|
49
|
+
repo:
|
|
50
|
+
Required Argument.
|
|
51
|
+
Specifies the repository name.
|
|
52
|
+
Types: str.
|
|
53
|
+
|
|
54
|
+
data_domain:
|
|
55
|
+
Optional Argument.
|
|
56
|
+
Specifies the data domain to which FeatureStore points to.
|
|
57
|
+
Note:
|
|
58
|
+
* If not specified, then default database name is considered as data domain.
|
|
59
|
+
Types: str
|
|
60
|
+
|
|
61
|
+
check:
|
|
62
|
+
Optional Argument.
|
|
63
|
+
Specifies whether to check the existence of the Feature store DB objects or not.
|
|
64
|
+
When set to True, the method checks for the existence of Feature store DB objects.
|
|
65
|
+
Otherwise, the method does not verify the existence of Feature store DB objects.
|
|
66
|
+
Default Value: True
|
|
67
|
+
Types: bool
|
|
68
|
+
|
|
69
|
+
RETURNS:
|
|
70
|
+
Object of FeatureStore.
|
|
71
|
+
|
|
72
|
+
RAISES:
|
|
73
|
+
None
|
|
74
|
+
|
|
75
|
+
EXAMPLES:
|
|
76
|
+
# Example 1: Create an instance of FeatureStore for repository 'vfs_v1'.
|
|
77
|
+
>>> from teradataml import FeatureStore
|
|
78
|
+
>>> fs = FeatureStore(repo='vfs_v1')
|
|
79
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
80
|
+
|
|
81
|
+
>>> fs.setup()
|
|
82
|
+
True
|
|
83
|
+
|
|
84
|
+
>>> fs
|
|
85
|
+
VantageFeatureStore(abc)-v2.0.0
|
|
86
|
+
"""
|
|
87
|
+
argument_validation_params = []
|
|
88
|
+
argument_validation_params.append(["repo", repo, False, (str), True])
|
|
89
|
+
|
|
90
|
+
# Validate argument types
|
|
91
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
92
|
+
|
|
93
|
+
connection = get_connection()
|
|
94
|
+
if connection is None:
|
|
95
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.CONNECTION_FAILURE),
|
|
96
|
+
MessageCodes.CONNECTION_FAILURE)
|
|
97
|
+
self._logger.info(f"Creating FeatureStore for repo: {repo}, data_domain: {data_domain}, check: {check}")
|
|
98
|
+
|
|
99
|
+
# Do not validate the existance of repo as it consumes a network call.
|
|
100
|
+
self.__repo = repo
|
|
101
|
+
self.__version = ""
|
|
102
|
+
|
|
103
|
+
# Declare SQLBundle to use it further.
|
|
104
|
+
self.__sql_bundle = SQLBundle()
|
|
105
|
+
|
|
106
|
+
# Store all the DF's here so no need to create these every time.
|
|
107
|
+
self.__df_container = {}
|
|
108
|
+
self._logger.debug("Initialized FeatureStore attributes and containers")
|
|
109
|
+
|
|
110
|
+
# Store the table names here. Then use this where ever required.
|
|
111
|
+
self.__table_names = EFS_DB_COMPONENTS
|
|
112
|
+
|
|
113
|
+
# Declare getter's for getting the corresponding DataFrame's using _FeatureStoreDFContainer directly.
|
|
114
|
+
# Only keep the lambda functions that are actually used in the codebase
|
|
115
|
+
self.__get_features_df = lambda : _FeatureStoreDFContainer.get_df("feature", self.__repo, self.__data_domain)
|
|
116
|
+
self.__get_features_wog_df = lambda : _FeatureStoreDFContainer.get_df("feature_wog", self.__repo, self.__data_domain)
|
|
117
|
+
self.__get_archived_features_df = lambda : _FeatureStoreDFContainer.get_df("feature_staging", self.__repo, self.__data_domain)
|
|
118
|
+
self.__get_feature_group_df = lambda : _FeatureStoreDFContainer.get_df("feature_group", self.__repo, self.__data_domain)
|
|
119
|
+
self.__get_archived_feature_group_df = lambda : _FeatureStoreDFContainer.get_df("feature_group_staging", self.__repo, self.__data_domain)
|
|
120
|
+
self.__get_entity_df = lambda : _FeatureStoreDFContainer.get_df("entity", self.__repo, self.__data_domain)
|
|
121
|
+
self.__get_archived_entity_df = lambda : _FeatureStoreDFContainer.get_df("entity_staging", self.__repo, self.__data_domain)
|
|
122
|
+
self.__get_data_source_df = lambda : _FeatureStoreDFContainer.get_df("data_source", self.__repo, self.__data_domain)
|
|
123
|
+
self.__get_archived_data_source_df = lambda : _FeatureStoreDFContainer.get_df("data_source_staging", self.__repo, self.__data_domain)
|
|
124
|
+
self.__get_dataset_catalog_df = lambda : _FeatureStoreDFContainer.get_df("dataset_catalog", self.__repo, self.__data_domain)
|
|
125
|
+
self.__get_data_domain_df = lambda : _FeatureStoreDFContainer.get_df("data_domain", self.__repo, self.__data_domain)
|
|
126
|
+
self.__get_feature_process_df = lambda : _FeatureStoreDFContainer.get_df("feature_process", self.__repo, self.__data_domain)
|
|
127
|
+
self.__get_features_metadata_df = lambda : _FeatureStoreDFContainer.get_df("feature_metadata", self.__repo, self.__data_domain)
|
|
128
|
+
self.__get_feature_info_df = lambda: _FeatureStoreDFContainer.get_df("feature_info", self.__repo, self.__data_domain)
|
|
129
|
+
self.__get_dataset_features_df = lambda: _FeatureStoreDFContainer.get_df("dataset_features", self.__repo, self.__data_domain)
|
|
130
|
+
self.__get_feature_runs_df = lambda : _FeatureStoreDFContainer.get_df("feature_runs", self.__repo, self.__data_domain)
|
|
131
|
+
self.__get_without_valid_period_df = lambda df: df.drop(columns=['ValidPeriod'])
|
|
132
|
+
self.__get_feature_version = lambda: _FeatureStoreDFContainer.get_df("feature_version", self.__repo, self.__data_domain)
|
|
133
|
+
|
|
134
|
+
self.__good_status = "Good"
|
|
135
|
+
self.__bad_status = "Bad"
|
|
136
|
+
self.__repaired_status = "Repaired"
|
|
137
|
+
|
|
138
|
+
self.__data_domain = data_domain if data_domain is not None else _get_current_databasename()
|
|
139
|
+
self._logger.debug(f"Set data domain to: {self.__data_domain}")
|
|
140
|
+
|
|
141
|
+
self.__repo_exists = connection.dialect._get_database_names(connection, self.__repo)
|
|
142
|
+
self._logger.debug(f"Repository exists check: {self.__repo_exists}")
|
|
143
|
+
|
|
144
|
+
if check:
|
|
145
|
+
self._logger.info("Checking existence of FeatureStore DB objects for repo: {}, data_domain: {}".format(repo, self.__data_domain))
|
|
146
|
+
return self.__validate_repo_exists()
|
|
147
|
+
else:
|
|
148
|
+
# If check is False, then do not check for the existence of DB objects.
|
|
149
|
+
self._logger.debug("Skipping repository validation and adding data domain")
|
|
150
|
+
self.__add_data_domain()
|
|
151
|
+
|
|
152
|
+
self._logger.info(f"FeatureStore created for repo: {repo}, data_domain: {data_domain}, check: {check}")
|
|
153
|
+
|
|
154
|
+
def __validate_repo_exists(self):
|
|
155
|
+
"""
|
|
156
|
+
Validate the repository.
|
|
157
|
+
|
|
158
|
+
PARAMETERS:
|
|
159
|
+
None
|
|
160
|
+
|
|
161
|
+
RETURNS:
|
|
162
|
+
None
|
|
163
|
+
|
|
164
|
+
RAISES:
|
|
165
|
+
ValueError: If the repo is invalid.
|
|
166
|
+
"""
|
|
167
|
+
self._logger.debug(f"Validating repository '{self.__repo}' existence and completeness")
|
|
168
|
+
|
|
169
|
+
# Check whether the repo exists or not.
|
|
170
|
+
if not self.__repo_exists:
|
|
171
|
+
self._logger.info(f"Repository '{self.__repo}' does not exist")
|
|
172
|
+
print("Repo {} does not exist. Run FeatureStore.setup() " \
|
|
173
|
+
"to create the repo and setup FeatureStore.".format(self.__repo))
|
|
174
|
+
return
|
|
175
|
+
|
|
176
|
+
# Check whether all the EFS tables exist or not.
|
|
177
|
+
existing_tabs = db_list_tables(schema_name=self.__repo, object_name='_efs%')
|
|
178
|
+
if not existing_tabs.empty:
|
|
179
|
+
existing_tables = set(existing_tabs['TableName'].tolist())
|
|
180
|
+
all_tables_exist = all(val in existing_tables for val in EFS_TABLES.values())
|
|
181
|
+
self._logger.debug(f"Found {len(existing_tables)} EFS tables in repository, all required tables exist: {all_tables_exist}")
|
|
182
|
+
else:
|
|
183
|
+
all_tables_exist = False
|
|
184
|
+
self._logger.debug("No EFS tables found in repository")
|
|
185
|
+
|
|
186
|
+
# Check whether all the EFS triggers exist or not.
|
|
187
|
+
all_triggers_exist, num_trigger_exist = _is_trigger_exist(self.__repo, list(EFS_TRIGGERS.values()))
|
|
188
|
+
self._logger.debug(f"EFS triggers status: {num_trigger_exist} triggers exist, all required triggers exist: {all_triggers_exist}")
|
|
189
|
+
|
|
190
|
+
# Check whether all the EFS tables and triggers exist or not.
|
|
191
|
+
# If exists, then insert the data domain name into _efs_data_domain table.
|
|
192
|
+
if all_tables_exist and all_triggers_exist:
|
|
193
|
+
self._logger.info("FeatureStore repository validation successful - all objects exist")
|
|
194
|
+
self.__add_data_domain()
|
|
195
|
+
# If all the tables and triggers are available, then
|
|
196
|
+
# FeatureStore is ready to use.
|
|
197
|
+
print("FeatureStore is ready to use.")
|
|
198
|
+
# All table and triggers does not exist.
|
|
199
|
+
# If the count of tables and triggers is 0, then
|
|
200
|
+
# FeatureStore is not setup.
|
|
201
|
+
elif num_trigger_exist == 0 and len(existing_tabs) == 0:
|
|
202
|
+
self._logger.info("FeatureStore is not setup - no objects found")
|
|
203
|
+
print("FeatureStore is not setup(). Run FeatureStore.setup() to setup FeatureStore.")
|
|
204
|
+
else:
|
|
205
|
+
self._logger.info("FeatureStore repository validation failed - some objects missing")
|
|
206
|
+
print("Some of the feature store objects are missing. Run FeatureStore.repair() to create missing objects.")
|
|
207
|
+
|
|
208
|
+
@property
|
|
209
|
+
def data_domain(self):
|
|
210
|
+
"""
|
|
211
|
+
DESCRIPTION:
|
|
212
|
+
Get the data domain.
|
|
213
|
+
|
|
214
|
+
PARAMETERS:
|
|
215
|
+
None
|
|
216
|
+
|
|
217
|
+
RETURNS:
|
|
218
|
+
str
|
|
219
|
+
|
|
220
|
+
RAISES:
|
|
221
|
+
None
|
|
222
|
+
|
|
223
|
+
EXAMPLES:
|
|
224
|
+
# Example 1: Use existing FeatureStore 'vfs_v1' to get the data domain.
|
|
225
|
+
>>> from teradataml import FeatureStore
|
|
226
|
+
>>> fs = FeatureStore(repo='vfs_v1', data_domain='test_domain')
|
|
227
|
+
FeatureStore is ready to use.
|
|
228
|
+
>>> fs.data_domain
|
|
229
|
+
'test_domain'
|
|
230
|
+
"""
|
|
231
|
+
self._logger.debug(f"Accessing data_domain property: {self.__data_domain}")
|
|
232
|
+
return self.__data_domain
|
|
233
|
+
|
|
234
|
+
@data_domain.setter
|
|
235
|
+
def data_domain(self, value):
|
|
236
|
+
"""
|
|
237
|
+
DESCRIPTION:
|
|
238
|
+
Set the data domain.
|
|
239
|
+
|
|
240
|
+
PARAMETERS:
|
|
241
|
+
value:
|
|
242
|
+
Required Argument.
|
|
243
|
+
Specifies the data domain name.
|
|
244
|
+
Types: str.
|
|
245
|
+
|
|
246
|
+
RETURNS:
|
|
247
|
+
None.
|
|
248
|
+
|
|
249
|
+
RAISES:
|
|
250
|
+
None
|
|
251
|
+
|
|
252
|
+
EXAMPLES:
|
|
253
|
+
# Example 1: Create or use existing FeatureStore for repository 'abc' and
|
|
254
|
+
# then change the data domain to 'xyz'.
|
|
255
|
+
>>> from teradataml import FeatureStore
|
|
256
|
+
>>> fs = FeatureStore('abc')
|
|
257
|
+
FeatureStore is ready to use.
|
|
258
|
+
|
|
259
|
+
# Set the data domain to 'xyz'.
|
|
260
|
+
>>> fs.data_domain = 'xyz'
|
|
261
|
+
|
|
262
|
+
# Get the data domain.
|
|
263
|
+
>>> fs.data_domain
|
|
264
|
+
'xyz'
|
|
265
|
+
"""
|
|
266
|
+
self._logger.info(f"Setting data domain from '{self.__data_domain}' to '{value}'")
|
|
267
|
+
|
|
268
|
+
argument_validation_params = []
|
|
269
|
+
argument_validation_params.append(["value", value, False, (str), True])
|
|
270
|
+
|
|
271
|
+
# Validate argument types
|
|
272
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
273
|
+
|
|
274
|
+
# Set the data domain value.
|
|
275
|
+
self.__data_domain = value
|
|
276
|
+
self.__add_data_domain()
|
|
277
|
+
self._logger.debug(f"Data domain successfully set to: {value}")
|
|
278
|
+
|
|
279
|
+
def __add_data_domain(self):
|
|
280
|
+
"""
|
|
281
|
+
DESCRIPTION:
|
|
282
|
+
Internal method to add the data domain.
|
|
283
|
+
|
|
284
|
+
PARAMETERS:
|
|
285
|
+
data_domain:
|
|
286
|
+
Required Argument.
|
|
287
|
+
Specifies the data domain name.
|
|
288
|
+
Types: str.
|
|
289
|
+
|
|
290
|
+
RETURNS:
|
|
291
|
+
None.
|
|
292
|
+
|
|
293
|
+
RAISES:
|
|
294
|
+
None
|
|
295
|
+
|
|
296
|
+
EXAMPLES:
|
|
297
|
+
>>> self.__add_data_domain()
|
|
298
|
+
"""
|
|
299
|
+
self._logger.debug(f"Adding data domain '{self.__data_domain}' to EFS metadata")
|
|
300
|
+
|
|
301
|
+
# Add the data domain to the EFS_DATA_DOMAINS table.
|
|
302
|
+
_insert_data(table_name=self.__table_names['data_domain'],
|
|
303
|
+
schema_name=self.__repo,
|
|
304
|
+
values=(self.__data_domain, dt.utcnow()),
|
|
305
|
+
columns=["name", "created_time"],
|
|
306
|
+
ignore_errors=[2801])
|
|
307
|
+
|
|
308
|
+
self._logger.debug(f"Data domain '{self.__data_domain}' added to metadata table")
|
|
309
|
+
|
|
310
|
+
@property
|
|
311
|
+
def repo(self):
|
|
312
|
+
"""
|
|
313
|
+
DESCRIPTION:
|
|
314
|
+
Get the repository.
|
|
315
|
+
|
|
316
|
+
PARAMETERS:
|
|
317
|
+
None
|
|
318
|
+
|
|
319
|
+
RETURNS:
|
|
320
|
+
str
|
|
321
|
+
|
|
322
|
+
RAISES:
|
|
323
|
+
None
|
|
324
|
+
|
|
325
|
+
EXAMPLES:
|
|
326
|
+
# Example 1: Get the repository name from FeatureStore.
|
|
327
|
+
>>> from teradataml import FeatureStore
|
|
328
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
329
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
330
|
+
|
|
331
|
+
# Get the repository name.
|
|
332
|
+
>>> fs.repo
|
|
333
|
+
'vfs_v1'
|
|
334
|
+
"""
|
|
335
|
+
self._logger.debug(f"Accessing repo property: {self.__repo}")
|
|
336
|
+
return self.__repo
|
|
337
|
+
|
|
338
|
+
@repo.setter
|
|
339
|
+
def repo(self, value):
|
|
340
|
+
"""
|
|
341
|
+
DESCRIPTION:
|
|
342
|
+
Set the repository.
|
|
343
|
+
|
|
344
|
+
PARAMETERS:
|
|
345
|
+
value:
|
|
346
|
+
Required Argument.
|
|
347
|
+
Specifies the repository name.
|
|
348
|
+
Types: str.
|
|
349
|
+
|
|
350
|
+
RETURNS:
|
|
351
|
+
None.
|
|
352
|
+
|
|
353
|
+
RAISES:
|
|
354
|
+
None
|
|
355
|
+
|
|
356
|
+
EXAMPLES:
|
|
357
|
+
# Example 1: Create a FeatureStore for repository 'abc' and
|
|
358
|
+
# then change the repository to 'xyz'.
|
|
359
|
+
>>> from teradataml import FeatureStore
|
|
360
|
+
>>> fs = FeatureStore('abc')
|
|
361
|
+
FeatureStore is ready to use.
|
|
362
|
+
|
|
363
|
+
# Get the repository name.
|
|
364
|
+
>>> fs.repo
|
|
365
|
+
'abc'
|
|
366
|
+
|
|
367
|
+
# Set the repository to 'xyz'.
|
|
368
|
+
>>> fs.repo = 'xyz'
|
|
369
|
+
>>> fs.repo
|
|
370
|
+
'xyz'
|
|
371
|
+
"""
|
|
372
|
+
self._logger.info(f"Setting repository from '{self.__repo}' to '{value}'")
|
|
373
|
+
|
|
374
|
+
argument_validation_params = []
|
|
375
|
+
argument_validation_params.append(["value", value, False, (str), True])
|
|
376
|
+
|
|
377
|
+
# Validate argument types
|
|
378
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
379
|
+
|
|
380
|
+
# remove all entries from container so they will be automatically
|
|
381
|
+
# point to new repo for subsequent API's.
|
|
382
|
+
self.__repo_exists = get_connection().dialect._get_database_names(get_connection(),
|
|
383
|
+
value)
|
|
384
|
+
self._logger.debug(f"Repository '{value}' exists: {self.__repo_exists}")
|
|
385
|
+
|
|
386
|
+
self.__validate_repo_exists()
|
|
387
|
+
|
|
388
|
+
self.__df_container.clear()
|
|
389
|
+
self._logger.debug("Cleared DataFrame container cache")
|
|
390
|
+
|
|
391
|
+
self.__version = None
|
|
392
|
+
|
|
393
|
+
# Set the repo value.
|
|
394
|
+
self.__repo = value
|
|
395
|
+
self._logger.debug(f"Repository successfully set to: {value}")
|
|
396
|
+
|
|
397
|
+
def __repr__(self):
|
|
398
|
+
"""
|
|
399
|
+
DESCRIPTION:
|
|
400
|
+
String representation for FeatureStore object.
|
|
401
|
+
|
|
402
|
+
PARAMETERS:
|
|
403
|
+
None
|
|
404
|
+
|
|
405
|
+
RETURNS:
|
|
406
|
+
str
|
|
407
|
+
|
|
408
|
+
RAISES:
|
|
409
|
+
None
|
|
410
|
+
|
|
411
|
+
EXAMPLES:
|
|
412
|
+
>>> from teradataml import FeatureStore
|
|
413
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
414
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
415
|
+
|
|
416
|
+
# Setup FeatureStore for this repository.
|
|
417
|
+
>>> fs.setup()
|
|
418
|
+
|
|
419
|
+
# Get the string representation of FeatureStore.
|
|
420
|
+
>>> fs
|
|
421
|
+
'VantageFeatureStore(vfs_v1)-v2.0.0'
|
|
422
|
+
|
|
423
|
+
"""
|
|
424
|
+
s = "VantageFeatureStore({})".format(self.__repo)
|
|
425
|
+
try:
|
|
426
|
+
version = "-v{}".format(self.__get_version())
|
|
427
|
+
self._logger.debug(f"Retrieved FeatureStore version: {version}")
|
|
428
|
+
except Exception as e:
|
|
429
|
+
self._logger.debug(f"Could not retrieve FeatureStore version: {e}")
|
|
430
|
+
version = ""
|
|
431
|
+
|
|
432
|
+
result = "{}{}".format(s, version)
|
|
433
|
+
self._logger.debug(f"Generated FeatureStore string representation: {result}")
|
|
434
|
+
return result
|
|
435
|
+
|
|
436
|
+
def __get_version(self):
|
|
437
|
+
"""
|
|
438
|
+
DESCRIPTION:
|
|
439
|
+
Internal method to get the FeatureStore version.
|
|
440
|
+
|
|
441
|
+
PARAMETERS:
|
|
442
|
+
None
|
|
443
|
+
|
|
444
|
+
RETURNS:
|
|
445
|
+
str
|
|
446
|
+
|
|
447
|
+
RAISES:
|
|
448
|
+
None
|
|
449
|
+
"""
|
|
450
|
+
if not self.__version:
|
|
451
|
+
self._logger.debug(f"Retrieving FeatureStore version from {self.__repo}.{self.__table_names['version']}")
|
|
452
|
+
sql = "SELECT version FROM {}.{}".format(self.__repo, self.__table_names['version'])
|
|
453
|
+
self.__version = next(execute_sql(sql))[0]
|
|
454
|
+
self._logger.debug(f"Retrieved version: {self.__version}")
|
|
455
|
+
return self.__version
|
|
456
|
+
|
|
457
|
+
@staticmethod
|
|
458
|
+
def list_repos() -> DataFrame:
|
|
459
|
+
"""
|
|
460
|
+
DESCRIPTION:
|
|
461
|
+
Function to list down the repositories.
|
|
462
|
+
|
|
463
|
+
PARAMETERS:
|
|
464
|
+
None
|
|
465
|
+
|
|
466
|
+
RETURNS:
|
|
467
|
+
teradataml DataFrame
|
|
468
|
+
|
|
469
|
+
RAISES:
|
|
470
|
+
None
|
|
471
|
+
|
|
472
|
+
EXAMPLES:
|
|
473
|
+
>>> from teradataml import FeatureStore
|
|
474
|
+
# Example 1: List all the FeatureStore repositories using FeatureStore class.
|
|
475
|
+
>>> FeatureStore.list_repos()
|
|
476
|
+
repos
|
|
477
|
+
0 vfs_v1
|
|
478
|
+
|
|
479
|
+
# Example 2: List all the FeatureStore repositories using FeatureStore object.
|
|
480
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
481
|
+
FeatureStore is ready to use.
|
|
482
|
+
|
|
483
|
+
>>> fs.list_repos()
|
|
484
|
+
repos
|
|
485
|
+
0 vfs_v1
|
|
486
|
+
|
|
487
|
+
"""
|
|
488
|
+
get_td_logger().info("Listing all the FeatureStore repositories.")
|
|
489
|
+
df = DataFrame.from_query("select distinct DataBaseName as repos from dbc.tablesV where TableName='{}'".format(
|
|
490
|
+
EFS_DB_COMPONENTS['version']))
|
|
491
|
+
|
|
492
|
+
get_td_logger().debug(f"FeatureStore repositories listed:\n{df}")
|
|
493
|
+
return df
|
|
494
|
+
|
|
495
|
+
def setup(self, perm_size='10e9', spool_size='10e8'):
|
|
496
|
+
"""
|
|
497
|
+
DESCRIPTION:
|
|
498
|
+
Function to setup all the required objects in Vantage for the specified
|
|
499
|
+
repository.
|
|
500
|
+
Note:
|
|
501
|
+
The function checks whether repository exists or not. If not exists,
|
|
502
|
+
it first creates the repository and then creates the corresponding tables.
|
|
503
|
+
Hence make sure the user with which is it connected to Vantage
|
|
504
|
+
has corresponding access rights for creating DataBase and creating
|
|
505
|
+
tables in the corresponding database.
|
|
506
|
+
|
|
507
|
+
PARAMETERS:
|
|
508
|
+
perm_size:
|
|
509
|
+
Optional Argument.
|
|
510
|
+
Specifies the number of bytes to allocate to FeatureStore "repo"
|
|
511
|
+
for permanent space.
|
|
512
|
+
Note:
|
|
513
|
+
Exponential notation can also be used.
|
|
514
|
+
Default Value: 10e9
|
|
515
|
+
Types: str or int
|
|
516
|
+
|
|
517
|
+
spool_size:
|
|
518
|
+
Optional Argument.
|
|
519
|
+
Specifies the number of bytes to allocate to FeatureStore "repo"
|
|
520
|
+
for spool space.
|
|
521
|
+
Note:
|
|
522
|
+
Exponential notation can also be used.
|
|
523
|
+
Default Value: 10e8
|
|
524
|
+
Types: str or int
|
|
525
|
+
|
|
526
|
+
RETURNS:
|
|
527
|
+
bool
|
|
528
|
+
|
|
529
|
+
RAISES:
|
|
530
|
+
TeradatamlException
|
|
531
|
+
|
|
532
|
+
EXAMPLES:
|
|
533
|
+
# Example 1: Setup FeatureStore for repository 'vfs_v1'.
|
|
534
|
+
>>> from teradataml import FeatureStore
|
|
535
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
536
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
537
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
538
|
+
|
|
539
|
+
# Setup FeatureStore for this repository.
|
|
540
|
+
>>> fs.setup()
|
|
541
|
+
True
|
|
542
|
+
|
|
543
|
+
>>> fs
|
|
544
|
+
VantageFeatureStore(vfs_v1)-v2.0.0
|
|
545
|
+
|
|
546
|
+
# Example 2: Setup FeatureStore for repository 'vfs_v2' with custom perm_size and spool_size.
|
|
547
|
+
# Create FeatureStore for repo 'vfs_v2'.
|
|
548
|
+
>>> fs = FeatureStore("vfs_v2")
|
|
549
|
+
Repo vfs_v2 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
550
|
+
|
|
551
|
+
# Setup FeatureStore for this repository.
|
|
552
|
+
>>> fs.setup(perm_size='50e6', spool_size='50e6')
|
|
553
|
+
True
|
|
554
|
+
|
|
555
|
+
>>> fs
|
|
556
|
+
VantageFeatureStore(vfs_v2)-v2.0.0
|
|
557
|
+
|
|
558
|
+
"""
|
|
559
|
+
self._logger.info(f"Setting up FeatureStore for repository '{self.__repo}' with perm_size: {perm_size}, spool_size: {spool_size}")
|
|
560
|
+
|
|
561
|
+
# If repo does not exist, then create it.
|
|
562
|
+
if not self.__repo_exists:
|
|
563
|
+
self._logger.info(f"Creating database '{self.__repo}' as it does not exist")
|
|
564
|
+
_create_database(self.__repo, perm_size, spool_size)
|
|
565
|
+
|
|
566
|
+
# Check whether version table exists or not. If exist, assume all
|
|
567
|
+
# tables are available.
|
|
568
|
+
all_tables_exist = get_connection().dialect.has_table(
|
|
569
|
+
get_connection(), self.__table_names['version'], schema=self.__repo)
|
|
570
|
+
self._logger.debug(f"Version table exists: {all_tables_exist}")
|
|
571
|
+
|
|
572
|
+
if not all_tables_exist:
|
|
573
|
+
self._logger.info("Creating FeatureStore database objects (tables, triggers, views)")
|
|
574
|
+
# Create the object tables.
|
|
575
|
+
for table_spec, table_name in EFS_TABLES.items():
|
|
576
|
+
self._logger.debug(f"Creating table: {table_name}")
|
|
577
|
+
execute_sql(table_spec.format(self.__repo, table_name))
|
|
578
|
+
self._logger.debug("All required tables created successfully")
|
|
579
|
+
# Create the Triggers.
|
|
580
|
+
for trigger_spec, trg_name in EFS_TRIGGERS.items():
|
|
581
|
+
self._logger.debug(f"Creating trigger: {trg_name}")
|
|
582
|
+
alter_name = trg_name.split('_trg')[0]
|
|
583
|
+
insert_name = self.__repo+'.'+alter_name+'_staging'
|
|
584
|
+
execute_sql(trigger_spec.format(self.__repo, trg_name,
|
|
585
|
+
alter_name, insert_name))
|
|
586
|
+
self._logger.debug("All required triggers created successfully")
|
|
587
|
+
|
|
588
|
+
# Create feature versions view.
|
|
589
|
+
self._logger.debug("Creating feature versions view")
|
|
590
|
+
sql = EFS_FEATURE_VERSION.format(self.__repo,
|
|
591
|
+
EFS_DB_COMPONENTS['feature_version'],
|
|
592
|
+
self.__repo,
|
|
593
|
+
self.__table_names['feature_process']
|
|
594
|
+
)
|
|
595
|
+
execute_sql(sql)
|
|
596
|
+
self._logger.debug("Feature versions view created successfully")
|
|
597
|
+
|
|
598
|
+
# After the setup is done, populate the version.
|
|
599
|
+
self._logger.debug("Populating version table")
|
|
600
|
+
insert_model = "insert into {}.{} values (?, ?);".format(self.__repo, self.__table_names['version'])
|
|
601
|
+
execute_sql(insert_model, (EFS_VERSION_, datetime.datetime.now()))
|
|
602
|
+
self._logger.debug("Version table populated successfully")
|
|
603
|
+
|
|
604
|
+
# Create the data domain in _efs_data_domain table.
|
|
605
|
+
self.__add_data_domain()
|
|
606
|
+
self._logger.debug("FeatureStore setup process completed successfully")
|
|
607
|
+
|
|
608
|
+
if self.__repo_exists and all_tables_exist:
|
|
609
|
+
self._logger.info(f"FeatureStore is already setup for repository '{self.__repo}'")
|
|
610
|
+
print("EFS is already setup for the repo {}.".format(self.__repo))
|
|
611
|
+
|
|
612
|
+
# Set the repo_exists to True
|
|
613
|
+
self.__repo_exists = True
|
|
614
|
+
self._logger.info(f"FeatureStore setup completed successfully for repository '{self.__repo}'")
|
|
615
|
+
return True
|
|
616
|
+
|
|
617
|
+
@property
|
|
618
|
+
def grant(self):
|
|
619
|
+
"""
|
|
620
|
+
DESCRIPTION:
|
|
621
|
+
Grants access on FeatureStore.
|
|
622
|
+
Note:
|
|
623
|
+
One must have admin access to grant access.
|
|
624
|
+
|
|
625
|
+
PARAMETERS:
|
|
626
|
+
None
|
|
627
|
+
|
|
628
|
+
RETURNS:
|
|
629
|
+
bool
|
|
630
|
+
|
|
631
|
+
RAISES:
|
|
632
|
+
OperationalError
|
|
633
|
+
|
|
634
|
+
EXAMPLES:
|
|
635
|
+
>>> from teradataml import FeatureStore
|
|
636
|
+
# Create FeatureStore for repo 'vfs_v2'.
|
|
637
|
+
>>> fs = FeatureStore("vfs_v2")
|
|
638
|
+
Repo vfs_v2 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
639
|
+
|
|
640
|
+
# Setup FeatureStore for this repository.
|
|
641
|
+
>>> fs.setup()
|
|
642
|
+
True
|
|
643
|
+
|
|
644
|
+
# Example 1: Grant read access on FeatureStore to user 'BoB'.
|
|
645
|
+
>>> fs.grant.read('BoB')
|
|
646
|
+
True
|
|
647
|
+
|
|
648
|
+
# Example 2: Grant write access on FeatureStore to user 'BoB'.
|
|
649
|
+
>>> fs.grant.write('BoB')
|
|
650
|
+
True
|
|
651
|
+
|
|
652
|
+
# Example 3: Grant read and write access on FeatureStore to user 'BoB'.
|
|
653
|
+
>>> fs.grant.read_write('BoB')
|
|
654
|
+
True
|
|
655
|
+
|
|
656
|
+
"""
|
|
657
|
+
self._logger.info(f"Granting access for repository: {self.__repo}")
|
|
658
|
+
return Grant(objects=AccessQueries,
|
|
659
|
+
database=self.__repo)
|
|
660
|
+
|
|
661
|
+
@property
|
|
662
|
+
def revoke(self):
|
|
663
|
+
"""
|
|
664
|
+
DESCRIPTION:
|
|
665
|
+
Revokes access on FeatureStore.
|
|
666
|
+
Note:
|
|
667
|
+
One must have admin access to revoke access.
|
|
668
|
+
|
|
669
|
+
PARAMETERS:
|
|
670
|
+
None
|
|
671
|
+
|
|
672
|
+
RETURNS:
|
|
673
|
+
bool
|
|
674
|
+
|
|
675
|
+
RAISES:
|
|
676
|
+
OperationalError
|
|
677
|
+
|
|
678
|
+
EXAMPLES:
|
|
679
|
+
>>> from teradataml import FeatureStore
|
|
680
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
681
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
682
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
683
|
+
|
|
684
|
+
# Setup FeatureStore for this repository.
|
|
685
|
+
>>> fs.setup()
|
|
686
|
+
True
|
|
687
|
+
|
|
688
|
+
# Example 1: Revoke read access on FeatureStore from user 'BoB'.
|
|
689
|
+
>>> fs.revoke.read('BoB')
|
|
690
|
+
True
|
|
691
|
+
|
|
692
|
+
# Example 2: Revoke write access on FeatureStore from user 'BoB'.
|
|
693
|
+
>>> fs.revoke.write('BoB')
|
|
694
|
+
True
|
|
695
|
+
|
|
696
|
+
# Example 3: Revoke read and write access on FeatureStore from user 'BoB'.
|
|
697
|
+
>>> fs.revoke.read_write('BoB')
|
|
698
|
+
True
|
|
699
|
+
"""
|
|
700
|
+
self._logger.info(f"Revoking access for repository: {self.__repo}")
|
|
701
|
+
return Revoke(objects=AccessQueries,
|
|
702
|
+
database=self.__repo)
|
|
703
|
+
|
|
704
|
+
def repair(self):
|
|
705
|
+
"""
|
|
706
|
+
DESCRIPTION:
|
|
707
|
+
Repairs the existing repo.
|
|
708
|
+
Notes:
|
|
709
|
+
* The method checks for the corresponding missing database objects which are
|
|
710
|
+
required for FeatureStore. If any of the database object is not available,
|
|
711
|
+
then it tries to create the object.
|
|
712
|
+
* The method repairs only the underlying tables and not data inside the
|
|
713
|
+
corresponding table.
|
|
714
|
+
|
|
715
|
+
PARAMETERS:
|
|
716
|
+
None
|
|
717
|
+
|
|
718
|
+
RETURNS:
|
|
719
|
+
bool
|
|
720
|
+
|
|
721
|
+
RAISES:
|
|
722
|
+
None
|
|
723
|
+
|
|
724
|
+
EXAMPLES:
|
|
725
|
+
# Example 1: Repair FeatureStore repo 'vfs_v1'.
|
|
726
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
727
|
+
>>> from teradataml import FeatureStore
|
|
728
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
729
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
730
|
+
|
|
731
|
+
# Setup FeatureStore for this repository.
|
|
732
|
+
>>> fs.setup()
|
|
733
|
+
|
|
734
|
+
# Drop the data_source_staging table to simulate the missing object.
|
|
735
|
+
>>> from teradataml import db_drop_table
|
|
736
|
+
>>> db_drop_table(schema_name='vfs_v1', table_name=EFS_DB_COMPONENTS['data_source_staging'])
|
|
737
|
+
|
|
738
|
+
# Verify the missing object by creating FeatureStore again.
|
|
739
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
740
|
+
Some of the feature store objects are missing. Run FeatureStore.repair() to create missing objects.
|
|
741
|
+
|
|
742
|
+
>>> fs.repair()
|
|
743
|
+
Successfully repaired the following objects: _efs_data_source_staging
|
|
744
|
+
True
|
|
745
|
+
"""
|
|
746
|
+
self._logger.info(f"Starting repair process for FeatureStore repository '{self.__repo}'")
|
|
747
|
+
|
|
748
|
+
# Check whether the repo exists or not.
|
|
749
|
+
if not self.__repo_exists:
|
|
750
|
+
self._logger.debug(f"Repository '{self.__repo}' does not exist - cannot repair")
|
|
751
|
+
print("Repo '{}' does not exist. Run FeatureStore.setup() " \
|
|
752
|
+
"to create the repo and setup FeatureStore.".format(self.__repo))
|
|
753
|
+
return False
|
|
754
|
+
|
|
755
|
+
# Get all existing EFS tables in the repo
|
|
756
|
+
existing_tabs = db_list_tables(schema_name=self.__repo, object_name='_efs%')
|
|
757
|
+
existing_tables = set(existing_tabs['TableName'].tolist())
|
|
758
|
+
self._logger.debug(f"Found {len(existing_tables)} existing EFS tables: {existing_tables}")
|
|
759
|
+
|
|
760
|
+
# Get non-existing tables in the order of EFS_TABLES.values()
|
|
761
|
+
non_existing_tables = {
|
|
762
|
+
table_spec: table_name
|
|
763
|
+
for table_spec, table_name in EFS_TABLES.items()
|
|
764
|
+
if table_name not in existing_tables
|
|
765
|
+
}
|
|
766
|
+
self._logger.debug(f"Missing tables: {list(non_existing_tables.values())}")
|
|
767
|
+
|
|
768
|
+
# Get all existing EFS triggers in the repo
|
|
769
|
+
sql = SQLBundle()._get_sql_query(SQLConstants.SQL_LIST_TRIGGERS).format(self.__repo, '_efs%')
|
|
770
|
+
existing_triggers = {row[0] for row in execute_sql(sql).fetchall()}
|
|
771
|
+
self._logger.debug(f"Found {len(existing_triggers)} existing EFS triggers: {existing_triggers}")
|
|
772
|
+
|
|
773
|
+
# Get non-existing triggers in the order of EFS_TRIGGERS.values()
|
|
774
|
+
non_existing_triggers = {
|
|
775
|
+
trigger_spec: trigger_name
|
|
776
|
+
for trigger_spec, trigger_name in EFS_TRIGGERS.items()
|
|
777
|
+
if trigger_name not in existing_triggers
|
|
778
|
+
}
|
|
779
|
+
self._logger.debug(f"Missing triggers: {list(non_existing_triggers.values())}")
|
|
780
|
+
|
|
781
|
+
# Check if feature_version view exists (it shows up in existing_tables from db_list_tables)
|
|
782
|
+
feature_version_exists = self.__table_names['feature_version'] in existing_tables
|
|
783
|
+
self._logger.debug(f"Feature version view exists: {feature_version_exists}")
|
|
784
|
+
|
|
785
|
+
# Return False only if all tables, triggers, and views exist
|
|
786
|
+
if not non_existing_tables and not non_existing_triggers and feature_version_exists:
|
|
787
|
+
self._logger.info(f"Repository '{self.__repo}' is complete and does not need repair")
|
|
788
|
+
print("repo '{}' is ready to use and do not need any repair.".format(self.__repo))
|
|
789
|
+
return False
|
|
790
|
+
|
|
791
|
+
failed_creation = []
|
|
792
|
+
created = []
|
|
793
|
+
# Iterating over EFS_TABLES based on the non-existing tables
|
|
794
|
+
for table_spec, table_name in non_existing_tables.items():
|
|
795
|
+
try:
|
|
796
|
+
self._logger.debug(f"Creating missing table: {table_name}")
|
|
797
|
+
execute_sql(table_spec.format(self.__repo, table_name))
|
|
798
|
+
created.append(table_name)
|
|
799
|
+
except Exception as e:
|
|
800
|
+
# If any table creation fails, then add it to the failed list
|
|
801
|
+
self._logger.debug(f"Failed to create table '{table_name}': {e}")
|
|
802
|
+
failed_creation.append((f"Table '{table_name}'", str(e)))
|
|
803
|
+
|
|
804
|
+
# Iterating over EFS_TRIGGERS based on the non-existing triggers
|
|
805
|
+
for trigger_spec, trigger_name in non_existing_triggers.items():
|
|
806
|
+
alter_name = trigger_name.split('_trg')[0]
|
|
807
|
+
insert_name = self.__repo + '.' + alter_name + '_staging'
|
|
808
|
+
try:
|
|
809
|
+
self._logger.debug(f"Creating missing trigger: {trigger_name}")
|
|
810
|
+
execute_sql(trigger_spec.format(self.__repo, trigger_name,
|
|
811
|
+
alter_name, insert_name))
|
|
812
|
+
created.append(trigger_name)
|
|
813
|
+
except Exception as e:
|
|
814
|
+
# If any trigger creation fails, then add it to the failed list
|
|
815
|
+
self._logger.debug(f"Failed to create trigger '{trigger_name}': {e}")
|
|
816
|
+
failed_creation.append((f"Trigger '{trigger_name}'", str(e)))
|
|
817
|
+
|
|
818
|
+
# Create feature versions view if it doesn't exist
|
|
819
|
+
if not feature_version_exists:
|
|
820
|
+
try:
|
|
821
|
+
self._logger.debug("Creating missing feature versions view")
|
|
822
|
+
sql = EFS_FEATURE_VERSION.format(self.__repo,
|
|
823
|
+
EFS_DB_COMPONENTS['feature_version'],
|
|
824
|
+
self.__repo,
|
|
825
|
+
self.__table_names['feature_process'])
|
|
826
|
+
execute_sql(sql)
|
|
827
|
+
created.append(EFS_DB_COMPONENTS['feature_version'])
|
|
828
|
+
except Exception as e:
|
|
829
|
+
self._logger.debug(f"Failed to create feature versions view: {e}")
|
|
830
|
+
failed_creation.append((f"View '{EFS_DB_COMPONENTS['feature_version']}'", str(e)))
|
|
831
|
+
|
|
832
|
+
# If any of the table or trigger creation fails, then return False
|
|
833
|
+
if failed_creation:
|
|
834
|
+
self._logger.debug(f"Repair completed with failures: {len(failed_creation)} objects could not be created")
|
|
835
|
+
print("The following objects could not be repaired:")
|
|
836
|
+
for obj, reason in failed_creation:
|
|
837
|
+
print(f" - {obj}: {reason}")
|
|
838
|
+
return False
|
|
839
|
+
|
|
840
|
+
self._logger.info(f"Repair completed successfully: created {len(created)} objects: {created}")
|
|
841
|
+
print("Successfully repaired the following objects: {}".format(", ".join(created)))
|
|
842
|
+
return True
|
|
843
|
+
|
|
844
|
+
def list_features(self, archived=False) -> DataFrame:
|
|
845
|
+
"""
|
|
846
|
+
DESCRIPTION:
|
|
847
|
+
List all the features.
|
|
848
|
+
|
|
849
|
+
PARAMETERS:
|
|
850
|
+
archived:
|
|
851
|
+
Optional Argument.
|
|
852
|
+
Specifies whether to list effective features or archived features.
|
|
853
|
+
When set to False, effective features in FeatureStore are listed,
|
|
854
|
+
otherwise, archived features are listed.
|
|
855
|
+
Default Value: False
|
|
856
|
+
Types: bool
|
|
857
|
+
|
|
858
|
+
RETURNS:
|
|
859
|
+
teradataml DataFrame
|
|
860
|
+
|
|
861
|
+
RAISES:
|
|
862
|
+
None
|
|
863
|
+
|
|
864
|
+
EXAMPLES:
|
|
865
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
866
|
+
# Create teradataml DataFrame.
|
|
867
|
+
>>> load_example_data("dataframe", "sales")
|
|
868
|
+
>>> df = DataFrame("sales")
|
|
869
|
+
|
|
870
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
871
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
872
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
873
|
+
# Setup FeatureStore for this repository.
|
|
874
|
+
>>> fs.setup()
|
|
875
|
+
True
|
|
876
|
+
|
|
877
|
+
# Create a FeatureGroup from teradataml DataFrame.
|
|
878
|
+
>>> fg = FeatureGroup.from_DataFrame(name='sales',
|
|
879
|
+
... entity_columns='accounts',
|
|
880
|
+
... df=df,
|
|
881
|
+
... timestamp_column='datetime')
|
|
882
|
+
# Apply the FeatureGroup to FeatureStore.
|
|
883
|
+
>>> fs.apply(fg)
|
|
884
|
+
True
|
|
885
|
+
|
|
886
|
+
# Example 1: List all the effective Features in the repo 'vfs_v1'.
|
|
887
|
+
>>> fs.list_features()
|
|
888
|
+
id column_name description tags data_type feature_type status creation_time modified_time group_name
|
|
889
|
+
name data_domain
|
|
890
|
+
Apr ALICE 4 Apr None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:31.262501 None sales
|
|
891
|
+
Jan ALICE 2 Jan None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:30.056273 None sales
|
|
892
|
+
Mar ALICE 3 Mar None None BIGINT CONTINUOUS ACTIVE 2025-07-28 03:17:30.678060 None sales
|
|
893
|
+
Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 03:17:29.403242 None sales
|
|
894
|
+
|
|
895
|
+
# Example 2: List all the archived Features in the repo 'vfs_v1'.
|
|
896
|
+
# Note: Feature can only be archived when it is not associated with any Group.
|
|
897
|
+
# Let's remove Feature 'Feb' from FeatureGroup.
|
|
898
|
+
>>> fg.remove_feature(fs.get_feature('Feb'))
|
|
899
|
+
True
|
|
900
|
+
|
|
901
|
+
# Apply the modified FeatureGroup to FeatureStore.
|
|
902
|
+
>>> fs.apply(fg)
|
|
903
|
+
True
|
|
904
|
+
|
|
905
|
+
# Archive Feature 'Feb'.
|
|
906
|
+
>>> fs.archive_feature('Feb')
|
|
907
|
+
Feature 'Feb' is archived.
|
|
908
|
+
True
|
|
909
|
+
|
|
910
|
+
# List all the archived Features in the repo 'vfs_v1'.
|
|
911
|
+
>>> fs.list_features(archived=True)
|
|
912
|
+
id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
|
|
913
|
+
0 1 Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 03:17:29.403242 None 2025-07-28 03:19:58.950000 sales
|
|
914
|
+
>>>
|
|
915
|
+
"""
|
|
916
|
+
self._logger.info(f"Listing features from repository '{self.__repo}', data_domain '{self.__data_domain}', archived: {archived}")
|
|
917
|
+
result = self.__get_archived_features_df() if archived else self.__get_features_df()
|
|
918
|
+
self._logger.debug(f"Retrieved features:\n{result}")
|
|
919
|
+
return result
|
|
920
|
+
|
|
921
|
+
def list_entities(self, archived=False) -> DataFrame:
|
|
922
|
+
"""
|
|
923
|
+
DESCRIPTION:
|
|
924
|
+
List all the entities.
|
|
925
|
+
|
|
926
|
+
PARAMETERS:
|
|
927
|
+
archived:
|
|
928
|
+
Optional Argument.
|
|
929
|
+
Specifies whether to list effective entities or archived entities.
|
|
930
|
+
When set to False, effective entities in FeatureStore are listed,
|
|
931
|
+
otherwise, archived entities are listed.
|
|
932
|
+
Default Value: False
|
|
933
|
+
Types: bool
|
|
934
|
+
|
|
935
|
+
RETURNS:
|
|
936
|
+
teradataml DataFrame
|
|
937
|
+
|
|
938
|
+
RAISES:
|
|
939
|
+
None
|
|
940
|
+
|
|
941
|
+
EXAMPLES:
|
|
942
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
943
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
944
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
945
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
946
|
+
# Setup FeatureStore for this repository.
|
|
947
|
+
>>> fs.setup()
|
|
948
|
+
True
|
|
949
|
+
|
|
950
|
+
# Create teradataml DataFrame.
|
|
951
|
+
>>> load_example_data("dataframe", "sales")
|
|
952
|
+
>>> df = DataFrame("sales")
|
|
953
|
+
|
|
954
|
+
# Create a FeatureGroup from teradataml DataFrame.
|
|
955
|
+
>>> fg = FeatureGroup.from_DataFrame(name='sales',
|
|
956
|
+
... entity_columns='accounts',
|
|
957
|
+
... df=df,
|
|
958
|
+
... timestamp_column='datetime')
|
|
959
|
+
# Apply the FeatureGroup to FeatureStore.
|
|
960
|
+
>>> fs.apply(fg)
|
|
961
|
+
True
|
|
962
|
+
|
|
963
|
+
# Example 1: List all the effective Entities in the repo 'vfs_v1'.
|
|
964
|
+
>>> fs.list_entities()
|
|
965
|
+
description creation_time modified_time entity_column
|
|
966
|
+
name data_domain
|
|
967
|
+
sales ALICE None 2025-07-28 03:17:31.558796 2025-07-28 03:19:41.233953 accounts
|
|
968
|
+
>>>
|
|
969
|
+
|
|
970
|
+
# Example 2: List all the archived Entities in the repo 'vfs_v1'.
|
|
971
|
+
# Note: Entity cannot be archived if it is a part of FeatureGroup.
|
|
972
|
+
# First create another Entity, and update FeatureGroup with
|
|
973
|
+
# other Entity. Then archive Entity 'sales'.
|
|
974
|
+
>>> entity = Entity('store_sales', columns=df.accounts)
|
|
975
|
+
# Update new entity to FeatureGroup.
|
|
976
|
+
>>> fg.apply(entity)
|
|
977
|
+
True
|
|
978
|
+
|
|
979
|
+
# Update FeatureGroup to FeatureStore. This will update Entity
|
|
980
|
+
# from 'sales' to 'store_sales' for FeatureGroup 'sales'.
|
|
981
|
+
>>> fs.apply(fg)
|
|
982
|
+
True
|
|
983
|
+
|
|
984
|
+
# Let's archive Entity 'sales' since it is not part of any FeatureGroup.
|
|
985
|
+
>>> fs.archive_entity('sales')
|
|
986
|
+
Entity 'sales' is archived.
|
|
987
|
+
True
|
|
988
|
+
>>>
|
|
989
|
+
|
|
990
|
+
# List the archived entities.
|
|
991
|
+
>>> fs.list_entities(archived=True)
|
|
992
|
+
description creation_time modified_time entity_column
|
|
993
|
+
name data_domain
|
|
994
|
+
store_sales ALICE None 2025-07-28 03:23:40.322424 None accounts
|
|
995
|
+
>>>
|
|
996
|
+
"""
|
|
997
|
+
self._logger.info(f"Listing entities from repository '{self.__repo}', data_domain '{self.__data_domain}', archived: {archived}")
|
|
998
|
+
result = self.__get_archived_entity_df() if archived else self.__get_entity_df()
|
|
999
|
+
self._logger.debug(f"Retrieved entities:\n{result}")
|
|
1000
|
+
return result
|
|
1001
|
+
|
|
1002
|
+
def list_data_sources(self, archived=False) -> DataFrame:
|
|
1003
|
+
"""
|
|
1004
|
+
DESCRIPTION:
|
|
1005
|
+
List all the Data Sources.
|
|
1006
|
+
|
|
1007
|
+
PARAMETERS:
|
|
1008
|
+
archived:
|
|
1009
|
+
Optional Argument.
|
|
1010
|
+
Specifies whether to list effective data sources or archived data sources.
|
|
1011
|
+
When set to False, effective data sources in FeatureStore are listed,
|
|
1012
|
+
otherwise, archived data sources are listed.
|
|
1013
|
+
Default Value: False
|
|
1014
|
+
Types: bool
|
|
1015
|
+
|
|
1016
|
+
RETURNS:
|
|
1017
|
+
teradataml DataFrame
|
|
1018
|
+
|
|
1019
|
+
RAISES:
|
|
1020
|
+
None
|
|
1021
|
+
|
|
1022
|
+
EXAMPLES:
|
|
1023
|
+
>>> from teradataml import DataSource, FeatureStore, load_example_data
|
|
1024
|
+
# Create teradataml DataFrame.
|
|
1025
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1026
|
+
>>> admissions = DataFrame("admissions_train")
|
|
1027
|
+
|
|
1028
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1029
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1030
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1031
|
+
# Setup FeatureStore for this repository.
|
|
1032
|
+
>>> fs.setup()
|
|
1033
|
+
True
|
|
1034
|
+
|
|
1035
|
+
# Create DataSource using teradataml DataFrame.
|
|
1036
|
+
>>> ds = DataSource(name='admissions', source=admissions)
|
|
1037
|
+
# Apply the DataSource to FeatureStore.
|
|
1038
|
+
>>> fs.apply(ds)
|
|
1039
|
+
True
|
|
1040
|
+
|
|
1041
|
+
# Example 1: List all the effective DataSources in the repo 'vfs_v1'.
|
|
1042
|
+
>>> fs.list_data_sources()
|
|
1043
|
+
description timestamp_column source creation_time modified_time
|
|
1044
|
+
name data_domain
|
|
1045
|
+
admissions ALICE None None select * from "admissions_train" 2025-07-28 03:26:53.507807 None
|
|
1046
|
+
|
|
1047
|
+
# Example 2: List all the archived DataSources in the repo 'vfs_v1'.
|
|
1048
|
+
# Let's first archive the DataSource.
|
|
1049
|
+
>>> fs.archive_data_source('admissions')
|
|
1050
|
+
DataSource 'admissions' is archived.
|
|
1051
|
+
True
|
|
1052
|
+
|
|
1053
|
+
# List archived DataSources.
|
|
1054
|
+
>>> fs.list_data_sources(archived=True)
|
|
1055
|
+
name data_domain description timestamp_column source creation_time modified_time archived_time
|
|
1056
|
+
0 admissions ALICE None None select * from "admissions_train" 2025-07-28 03:26:53.507807 None 2025-07-28 03:28:17.160000
|
|
1057
|
+
>>>
|
|
1058
|
+
"""
|
|
1059
|
+
self._logger.info(f"Listing data sources from repository '{self.__repo}', data_domain '{self.__data_domain}', archived: {archived}")
|
|
1060
|
+
result = self.__get_archived_data_source_df() if archived else self.__get_data_source_df()
|
|
1061
|
+
self._logger.debug(f"Retrieved data sources:\n{result}")
|
|
1062
|
+
return result
|
|
1063
|
+
|
|
1064
|
+
def list_feature_groups(self, archived=False) -> DataFrame:
|
|
1065
|
+
"""
|
|
1066
|
+
DESCRIPTION:
|
|
1067
|
+
List all the FeatureGroups.
|
|
1068
|
+
|
|
1069
|
+
PARAMETERS:
|
|
1070
|
+
archived:
|
|
1071
|
+
Optional Argument.
|
|
1072
|
+
Specifies whether to list effective feature groups or archived feature groups.
|
|
1073
|
+
When set to False, effective feature groups in FeatureStore are listed,
|
|
1074
|
+
otherwise, archived feature groups are listed.
|
|
1075
|
+
Default Value: False
|
|
1076
|
+
Types: bool
|
|
1077
|
+
|
|
1078
|
+
RETURNS:
|
|
1079
|
+
teradataml DataFrame
|
|
1080
|
+
|
|
1081
|
+
RAISES:
|
|
1082
|
+
None
|
|
1083
|
+
|
|
1084
|
+
EXAMPLES:
|
|
1085
|
+
>>> from teradataml import FeatureGroup, FeatureStore, load_example_data
|
|
1086
|
+
# Create teradataml DataFrame.
|
|
1087
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1088
|
+
>>> admissions=DataFrame("admissions_train")
|
|
1089
|
+
|
|
1090
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1091
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1092
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1093
|
+
# Setup FeatureStore for this repository.
|
|
1094
|
+
>>> fs.setup()
|
|
1095
|
+
True
|
|
1096
|
+
|
|
1097
|
+
# Create a FeatureGroup from DataFrame.
|
|
1098
|
+
>>> fg = FeatureGroup.from_DataFrame("admissions", df=admissions, entity_columns='id')
|
|
1099
|
+
# Apply FeatureGroup to FeatureStore.
|
|
1100
|
+
>>> fs.apply(fg)
|
|
1101
|
+
True
|
|
1102
|
+
|
|
1103
|
+
# Example 1: List all the effective FeatureGroups in the repo 'vfs_v1'.
|
|
1104
|
+
>>> fs.list_feature_groups()
|
|
1105
|
+
description data_source_name entity_name creation_time modified_time
|
|
1106
|
+
name data_domain
|
|
1107
|
+
admissions ALICE None admissions admissions 2025-07-28 03:30:04.115331 None
|
|
1108
|
+
|
|
1109
|
+
# Example 2: List all the archived FeatureGroups in the repo 'vfs_v1'.
|
|
1110
|
+
# Let's first archive the FeatureGroup.
|
|
1111
|
+
>>> fs.archive_feature_group("admissions")
|
|
1112
|
+
True
|
|
1113
|
+
|
|
1114
|
+
# List archived FeatureGroups.
|
|
1115
|
+
>>> fs.list_feature_groups(archived=True)
|
|
1116
|
+
name data_domain description data_source_name entity_name creation_time modified_time archived_time
|
|
1117
|
+
0 admissions ALICE None admissions admissions 2025-07-28 03:30:04.115331 None 2025-07-28 03:31:04.550000
|
|
1118
|
+
>>>
|
|
1119
|
+
"""
|
|
1120
|
+
self._logger.info(f"Listing feature groups from repository '{self.__repo}', data_domain '{self.__data_domain}', archived: {archived}")
|
|
1121
|
+
result = self.__get_archived_feature_group_df() if archived else self.__get_feature_group_df()
|
|
1122
|
+
self._logger.debug(f"Retrieved feature groups:\n{result}")
|
|
1123
|
+
return result
|
|
1124
|
+
|
|
1125
|
+
def list_data_domains(self) -> DataFrame:
|
|
1126
|
+
"""
|
|
1127
|
+
DESCRIPTION:
|
|
1128
|
+
Lists all the data domains.
|
|
1129
|
+
|
|
1130
|
+
PARAMETERS:
|
|
1131
|
+
None
|
|
1132
|
+
|
|
1133
|
+
RETURNS:
|
|
1134
|
+
teradataml DataFrame
|
|
1135
|
+
|
|
1136
|
+
RAISES:
|
|
1137
|
+
None
|
|
1138
|
+
|
|
1139
|
+
EXAMPLES:
|
|
1140
|
+
# Example 1: List all the data domains in the repo 'vfs_v1'.
|
|
1141
|
+
>>> from teradataml import FeatureStore
|
|
1142
|
+
# Create FeatureStore for repo 'vfs_v1' with data_domain 'd1'.
|
|
1143
|
+
>>> fs = FeatureStore("vfs_v1", data_domain='d1')
|
|
1144
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1145
|
+
|
|
1146
|
+
# List all the data domains in the repo 'vfs_v1'.
|
|
1147
|
+
>>> fs.list_data_domains()
|
|
1148
|
+
name created_time
|
|
1149
|
+
0 d1 2025-04-30 11:21:40.123456
|
|
1150
|
+
"""
|
|
1151
|
+
self._logger.info("Listing all the data domains in the repo '{}'.".format(self.__repo))
|
|
1152
|
+
res = self.__get_data_domain_df()
|
|
1153
|
+
|
|
1154
|
+
self._logger.debug("Data domains listed: \n{}".format(res))
|
|
1155
|
+
return res
|
|
1156
|
+
|
|
1157
|
+
def list_feature_processes(self, archived=False) -> DataFrame:
|
|
1158
|
+
"""
|
|
1159
|
+
DESCRIPTION:
|
|
1160
|
+
Lists all the feature processes.
|
|
1161
|
+
|
|
1162
|
+
PARAMETERS:
|
|
1163
|
+
archived:
|
|
1164
|
+
Optional Argument.
|
|
1165
|
+
Specifies whether to retrieve archived feature processes or not.
|
|
1166
|
+
When set to True, archived feature processes in FeatureStore are listed.
|
|
1167
|
+
Otherwise, all feature processes are listed.
|
|
1168
|
+
Default Value: False
|
|
1169
|
+
Types: bool
|
|
1170
|
+
|
|
1171
|
+
RETURNS:
|
|
1172
|
+
teradataml DataFrame
|
|
1173
|
+
|
|
1174
|
+
RAISES:
|
|
1175
|
+
None
|
|
1176
|
+
|
|
1177
|
+
EXAMPLES:
|
|
1178
|
+
# Example 1: List all the feature processes in the repo 'vfs_v1'.
|
|
1179
|
+
>>> from teradataml import FeatureStore
|
|
1180
|
+
|
|
1181
|
+
# Create FeatureStore 'vfs_v1' or use existing one.
|
|
1182
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1183
|
+
FeatureStore is ready to use.
|
|
1184
|
+
|
|
1185
|
+
# Load the sales data.
|
|
1186
|
+
>>> load_example_data("dataframe", "sales")
|
|
1187
|
+
>>> df = DataFrame("sales")
|
|
1188
|
+
|
|
1189
|
+
# Create a feature process.
|
|
1190
|
+
>>> from teradataml import FeatureProcess
|
|
1191
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
1192
|
+
... data_domain='sales',
|
|
1193
|
+
... object=df,
|
|
1194
|
+
... entity="accounts",
|
|
1195
|
+
... features=["Jan", "Feb", "Mar", "Apr"])
|
|
1196
|
+
>>> fp.run()
|
|
1197
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
|
|
1198
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
|
|
1199
|
+
|
|
1200
|
+
# List all the feature processes in the repo 'vfs_v1'.
|
|
1201
|
+
>>> fs.list_feature_processes()
|
|
1202
|
+
description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
|
|
1203
|
+
process_id
|
|
1204
|
+
5747082b-4acb-11f0-a2d7-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-06-16 16:02:55.260000+00: 9999-12-31 23:59:59.999999+00:
|
|
1205
|
+
|
|
1206
|
+
# Example 2: List all the archived feature processes in the repo 'vfs_v1'.
|
|
1207
|
+
|
|
1208
|
+
# Let's check the archived feature processes before archiving feature process.
|
|
1209
|
+
>>> fs.list_feature_processes(archived=True)
|
|
1210
|
+
process_id start_time end_time status filter as_of_start as_of_end failure_reason
|
|
1211
|
+
|
|
1212
|
+
# Archive the feature process by passing the process_id.
|
|
1213
|
+
>>> fs.archive_feature_process('5747082b-4acb-11f0-a2d7-f020ffe7fe09')
|
|
1214
|
+
Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
|
|
1215
|
+
Feature 'Feb' is archived from metadata.
|
|
1216
|
+
Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
1217
|
+
Feature 'Jan' is archived from metadata.
|
|
1218
|
+
Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
1219
|
+
Feature 'Mar' is archived from metadata.
|
|
1220
|
+
Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
1221
|
+
Feature 'Apr' is archived from metadata.
|
|
1222
|
+
FeatureProcess with process id '5747082b-4acb-11f0-a2d7-f020ffe7fe09' is archived.
|
|
1223
|
+
True
|
|
1224
|
+
|
|
1225
|
+
# List all the archived feature processes in the repo 'vfs_v1'.
|
|
1226
|
+
>>> fs.list_feature_processes(archived=True)
|
|
1227
|
+
description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
|
|
1228
|
+
process_id
|
|
1229
|
+
5747082b-4acb-11f0-a2d7-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-06-16 16:02:55.260000+00: 2025-06-16 16:04:32.260000+00:
|
|
1230
|
+
|
|
1231
|
+
"""
|
|
1232
|
+
self._logger.info(f"Listing feature processes from repository '{self.__repo}', data_domain '{self.__data_domain}', archived: {archived}")
|
|
1233
|
+
|
|
1234
|
+
validate_params = []
|
|
1235
|
+
validate_params.append(["archived", archived, True, bool, True])
|
|
1236
|
+
# Validate argument types
|
|
1237
|
+
_Validators._validate_function_arguments(validate_params)
|
|
1238
|
+
|
|
1239
|
+
f_process_df = self.__get_without_valid_period_df(self.__get_feature_process_df())
|
|
1240
|
+
f_process_df = f_process_df[f_process_df.data_domain == self.__data_domain]
|
|
1241
|
+
|
|
1242
|
+
if archived:
|
|
1243
|
+
# Filter out the active feature process. Only archived features are returned.
|
|
1244
|
+
f_process_df = f_process_df[(Col("valid_end") <= Col('current_timestamp'))]
|
|
1245
|
+
self._logger.debug("Filtered to show only archived feature processes")
|
|
1246
|
+
|
|
1247
|
+
self._logger.debug(f"Retrieved feature processes:\n{f_process_df}")
|
|
1248
|
+
return f_process_df
|
|
1249
|
+
|
|
1250
|
+
def list_feature_runs(self):
|
|
1251
|
+
"""
|
|
1252
|
+
DESCRIPTION:
|
|
1253
|
+
Lists all the feature runs in the FeatureStore.
|
|
1254
|
+
|
|
1255
|
+
PARAMETERS:
|
|
1256
|
+
None
|
|
1257
|
+
|
|
1258
|
+
RETURNS:
|
|
1259
|
+
teradataml DataFrame
|
|
1260
|
+
|
|
1261
|
+
RAISES:
|
|
1262
|
+
None
|
|
1263
|
+
|
|
1264
|
+
EXAMPLES:
|
|
1265
|
+
# Example 1: List all the feature runs in the repo 'vfs_v1'.
|
|
1266
|
+
>>> from teradataml import FeatureStore
|
|
1267
|
+
|
|
1268
|
+
# Create a FeatureStore 'vfs_v1' or use existing one.
|
|
1269
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1270
|
+
FeatureStore is ready to use.
|
|
1271
|
+
|
|
1272
|
+
# Load the sales data.
|
|
1273
|
+
>>> load_example_data("dataframe", "sales")
|
|
1274
|
+
>>> df = DataFrame("sales")
|
|
1275
|
+
|
|
1276
|
+
# Create a feature process.
|
|
1277
|
+
>>> from teradataml import FeatureProcess
|
|
1278
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
1279
|
+
... data_domain='test_domain',
|
|
1280
|
+
... object=df,
|
|
1281
|
+
... entity='accounts',
|
|
1282
|
+
... features=['Mar', 'Apr'])
|
|
1283
|
+
>>> fp.run(filters=[df.accounts=='Alpha Co', "accounts='Jones LLC'"])
|
|
1284
|
+
Process '11b62599-692f-11f0-ad19-f020ffe7fe09' started.
|
|
1285
|
+
Ingesting the features for filter 'accounts = 'Alpha Co'' to catalog.
|
|
1286
|
+
Ingesting the features for filter 'accounts='Jones LLC'' to catalog.
|
|
1287
|
+
Process '11b62599-692f-11f0-ad19-f020ffe7fe09' completed.
|
|
1288
|
+
True
|
|
1289
|
+
|
|
1290
|
+
# List all the feature runs in the repo 'vfs_v1'.
|
|
1291
|
+
>>> fs.list_feature_runs()
|
|
1292
|
+
process_id data_domain start_time end_time status filter as_of_start as_of_end failure_reason
|
|
1293
|
+
run_id
|
|
1294
|
+
1 11b62599-692f-11f0-ad19-f020ffe7fe09 test_domain 2025-07-25 08:12:13.001968 2025-07-25 08:12:13.001968 completed accounts = 'Alpha Co', accounts='Jones LLC' None None None
|
|
1295
|
+
"""
|
|
1296
|
+
self._logger.info(f"Listing feature runs from repository '{self.__repo}'")
|
|
1297
|
+
result = self.__get_feature_runs_df()
|
|
1298
|
+
self._logger.debug(f"Retrieved feature runs:\n{result}")
|
|
1299
|
+
return result
|
|
1300
|
+
|
|
1301
|
+
def list_dataset_catalogs(self) -> DataFrame:
|
|
1302
|
+
"""
|
|
1303
|
+
DESCRIPTION:
|
|
1304
|
+
Lists all the dataset catalogs.
|
|
1305
|
+
|
|
1306
|
+
PARAMETERS:
|
|
1307
|
+
None
|
|
1308
|
+
|
|
1309
|
+
RETURNS:
|
|
1310
|
+
teradataml DataFrame
|
|
1311
|
+
|
|
1312
|
+
RAISES:
|
|
1313
|
+
None
|
|
1314
|
+
|
|
1315
|
+
EXAMPLES:
|
|
1316
|
+
# Example 1: List all the dataset catalogs in the repo 'vfs_v1'.
|
|
1317
|
+
>>> from teradataml import FeatureStore
|
|
1318
|
+
|
|
1319
|
+
# Create FeatureStore 'vfs_v1' or use existing one.
|
|
1320
|
+
>>> fs = FeatureStore("vfs_v1", data_domain='sales')
|
|
1321
|
+
FeatureStore is ready to use.
|
|
1322
|
+
|
|
1323
|
+
# Load the sales data.
|
|
1324
|
+
>>> load_example_data("dataframe", "sales")
|
|
1325
|
+
>>> df = DataFrame("sales")
|
|
1326
|
+
|
|
1327
|
+
# Create a feature process.
|
|
1328
|
+
>>> from teradataml import FeatureProcess
|
|
1329
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
1330
|
+
... data_domain='sales',
|
|
1331
|
+
... object=df,
|
|
1332
|
+
... entity="accounts",
|
|
1333
|
+
... features=["Jan", "Feb", "Mar", "Apr"])
|
|
1334
|
+
>>> fp.run()
|
|
1335
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
|
|
1336
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
|
|
1337
|
+
|
|
1338
|
+
# create a dataset catalog.
|
|
1339
|
+
>>> from teradataml import DatasetCatalog
|
|
1340
|
+
>>> dc = DatasetCatalog(repo='vfs_v1', data_domain='sales')
|
|
1341
|
+
>>> dataset = dc.build_dataset(entity='accounts',
|
|
1342
|
+
... selected_features = {
|
|
1343
|
+
... 'Jan': '5747082b-4acb-11f0-a2d7-f020ffe7fe09',
|
|
1344
|
+
... 'Feb': '5747082b-4acb-11f0-a2d7-f020ffe7fe09'},
|
|
1345
|
+
... view_name='ds_jan_feb',
|
|
1346
|
+
... description='Dataset with Jan and Feb features')
|
|
1347
|
+
|
|
1348
|
+
# List all the dataset catalogs in the repo 'vfs_v1'.
|
|
1349
|
+
>>> fs.list_dataset_catalogs()
|
|
1350
|
+
data_domain name entity_name database_name description valid_start valid_end
|
|
1351
|
+
id
|
|
1352
|
+
4f763a7b-8920-448c-87af-432e7d36c9cb sales ds_jan_feb accounts vfs_v1 Dataset with Jan and Feb features 2025-06-16 16:15:17.577637+00: 9999-12-31 23:59:59.999999+00:
|
|
1353
|
+
"""
|
|
1354
|
+
self._logger.info(f"Listing dataset catalogs from repository '{self.__repo}'")
|
|
1355
|
+
result = self.__get_without_valid_period_df(self.__get_dataset_catalog_df())
|
|
1356
|
+
self._logger.debug(f"Retrieved dataset catalogs:\n{result}")
|
|
1357
|
+
return result
|
|
1358
|
+
|
|
1359
|
+
def get_feature(self, name):
|
|
1360
|
+
"""
|
|
1361
|
+
DESCRIPTION:
|
|
1362
|
+
Retrieve the feature.
|
|
1363
|
+
|
|
1364
|
+
PARAMETERS:
|
|
1365
|
+
name:
|
|
1366
|
+
Required Argument.
|
|
1367
|
+
Specifies the name of the feature to get.
|
|
1368
|
+
Types: str
|
|
1369
|
+
|
|
1370
|
+
RETURNS:
|
|
1371
|
+
Feature.
|
|
1372
|
+
|
|
1373
|
+
RAISES:
|
|
1374
|
+
TeradataMLException
|
|
1375
|
+
|
|
1376
|
+
EXAMPLES:
|
|
1377
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
1378
|
+
# Create DataFrame on sales data.
|
|
1379
|
+
>>> load_example_data("dataframe", "sales")
|
|
1380
|
+
>>> df = DataFrame("sales")
|
|
1381
|
+
>>> df
|
|
1382
|
+
Feb Jan Mar Apr datetime
|
|
1383
|
+
accounts
|
|
1384
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
1385
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
1386
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
1387
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
1388
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
1389
|
+
|
|
1390
|
+
# Create a FeatureStore for repo 'vfs_v1'.
|
|
1391
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1392
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1393
|
+
# Setup FeatureStore for this repository.
|
|
1394
|
+
>>> fs.setup()
|
|
1395
|
+
True
|
|
1396
|
+
|
|
1397
|
+
# Create Feature for column 'Mar' with name 'sales_mar'.
|
|
1398
|
+
>>> feature = Feature('sales_mar', column=df.Mar)
|
|
1399
|
+
|
|
1400
|
+
# Apply the Feature to FeatureStore.
|
|
1401
|
+
>>> fs.apply(feature)
|
|
1402
|
+
True
|
|
1403
|
+
|
|
1404
|
+
# Get the feature 'sales_mar' from repo 'vfs_v1'.
|
|
1405
|
+
>>> feature = fs.get_feature('sales_mar')
|
|
1406
|
+
>>> feature
|
|
1407
|
+
Feature(name=sales_mar)
|
|
1408
|
+
"""
|
|
1409
|
+
self._logger.info(f"Getting feature '{name}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
1410
|
+
|
|
1411
|
+
argument_validation_params = []
|
|
1412
|
+
argument_validation_params.append(["name", name, False, (str), True])
|
|
1413
|
+
|
|
1414
|
+
# Validate argument types
|
|
1415
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
1416
|
+
|
|
1417
|
+
# Check if the feature exists in the current data domain.
|
|
1418
|
+
df = self.__get_features_wog_df()
|
|
1419
|
+
df = df[(df['name'] == name) &
|
|
1420
|
+
(df['data_domain'] == self.__data_domain)]
|
|
1421
|
+
|
|
1422
|
+
# If no records found, check if the feature exists in any domain.
|
|
1423
|
+
if df.shape[0] == 0:
|
|
1424
|
+
self._logger.debug(f"Feature '{name}' not found in current data domain '{self.__data_domain}', checking other domains")
|
|
1425
|
+
res = _FSUtils._get_data_domains(self.__repo, name, 'feature')
|
|
1426
|
+
if res:
|
|
1427
|
+
self._logger.debug(f"Feature '{name}' exists in other domains: {res}")
|
|
1428
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
1429
|
+
error_msg = Messages.get_message(msg_code, "Feature", "name '{}'".format(name),
|
|
1430
|
+
self.__data_domain, res)
|
|
1431
|
+
else:
|
|
1432
|
+
self._logger.debug(f"Feature '{name}' does not exist in any domain")
|
|
1433
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
1434
|
+
error_msg = Messages.get_message(msg_code, "Feature", "name '{}'".format(name),
|
|
1435
|
+
self.__data_domain)
|
|
1436
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
1437
|
+
|
|
1438
|
+
self._logger.debug(f"Successfully retrieved feature '{name}' from data domain '{self.__data_domain}'")
|
|
1439
|
+
return Feature._from_df(df)
|
|
1440
|
+
|
|
1441
|
+
def get_group_features(self, group_name):
|
|
1442
|
+
"""
|
|
1443
|
+
DESCRIPTION:
|
|
1444
|
+
Get the Features from the given feature group name.
|
|
1445
|
+
|
|
1446
|
+
PARAMETERS:
|
|
1447
|
+
group_name:
|
|
1448
|
+
Required Argument.
|
|
1449
|
+
Specifies the name of the group the feature belongs to.
|
|
1450
|
+
Types: str
|
|
1451
|
+
|
|
1452
|
+
RETURNS:
|
|
1453
|
+
List of Feature objects.
|
|
1454
|
+
|
|
1455
|
+
RAISES:
|
|
1456
|
+
TeradataMLException
|
|
1457
|
+
|
|
1458
|
+
EXAMPLES:
|
|
1459
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
1460
|
+
|
|
1461
|
+
# Create DataFrame on sales data.
|
|
1462
|
+
>>> load_example_data("dataframe", "sales")
|
|
1463
|
+
>>> df = DataFrame("sales")
|
|
1464
|
+
>>> df
|
|
1465
|
+
Feb Jan Mar Apr datetime
|
|
1466
|
+
accounts
|
|
1467
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
1468
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
1469
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
1470
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
1471
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
1472
|
+
|
|
1473
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1474
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1475
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1476
|
+
# Setup FeatureStore for this repository.
|
|
1477
|
+
>>> fs.setup()
|
|
1478
|
+
True
|
|
1479
|
+
|
|
1480
|
+
# Create FeatureGroup with name 'sales' from DataFrame.
|
|
1481
|
+
>>> fg = FeatureGroup.from_DataFrame(
|
|
1482
|
+
... name="sales", df=df, entity_columns="accounts", timestamp_column="datetime")
|
|
1483
|
+
# Apply the FeatureGroup to FeatureStore.
|
|
1484
|
+
>>> fs.apply(fg)
|
|
1485
|
+
True
|
|
1486
|
+
|
|
1487
|
+
# Get all the features belongs to the group 'sales' from repo 'vfs_v1'.
|
|
1488
|
+
>>> features = fs.get_group_features('sales')
|
|
1489
|
+
>>> features
|
|
1490
|
+
[Feature(name=Jan), Feature(name=Feb), Feature(name=Apr), Feature(name=Mar)]
|
|
1491
|
+
>>>
|
|
1492
|
+
"""
|
|
1493
|
+
argument_validation_params = []
|
|
1494
|
+
argument_validation_params.append(["group_name", group_name, False, (str), True])
|
|
1495
|
+
|
|
1496
|
+
# Validate argument types
|
|
1497
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
1498
|
+
|
|
1499
|
+
self._logger.info(f"Getting features for group '{group_name}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
1500
|
+
|
|
1501
|
+
# Select active features.
|
|
1502
|
+
features_df = self.__get_features_df()
|
|
1503
|
+
features_df = features_df[((features_df.status != FeatureStatus.INACTIVE.name) &
|
|
1504
|
+
(features_df.group_name == group_name) &
|
|
1505
|
+
(features_df.data_domain == self.__data_domain))]
|
|
1506
|
+
|
|
1507
|
+
# Check if a feature with that group name exists or not. If not, raise error.
|
|
1508
|
+
if features_df.shape[0] == 0:
|
|
1509
|
+
self._logger.debug(f"No features found for group '{group_name}' in current data domain '{self.__data_domain}', checking other domains")
|
|
1510
|
+
res = _FSUtils._get_data_domains(self.__repo, group_name, 'group_features')
|
|
1511
|
+
if res:
|
|
1512
|
+
self._logger.debug(f"Features for group '{group_name}' exist in other domains: {res}")
|
|
1513
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
1514
|
+
error_msg = Messages.get_message(msg_code, "Features", "group name '{}'".format(group_name),
|
|
1515
|
+
self.__data_domain, res)
|
|
1516
|
+
else:
|
|
1517
|
+
self._logger.debug(f"No features found for group '{group_name}' in any domain")
|
|
1518
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
1519
|
+
error_msg = Messages.get_message(msg_code, "Features", "group name '{}'".format(group_name),
|
|
1520
|
+
self.__data_domain)
|
|
1521
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
1522
|
+
|
|
1523
|
+
self._logger.debug(f"Successfully retrieved features for group '{group_name}':\n{features_df}")
|
|
1524
|
+
return Feature._from_df(features_df)
|
|
1525
|
+
|
|
1526
|
+
def get_feature_group(self, name):
|
|
1527
|
+
"""
|
|
1528
|
+
DESCRIPTION:
|
|
1529
|
+
Retrieve the FeatureGroup using name.
|
|
1530
|
+
|
|
1531
|
+
PARAMETERS:
|
|
1532
|
+
name:
|
|
1533
|
+
Required Argument.
|
|
1534
|
+
Specifies the name of the feature group to be retrieved.
|
|
1535
|
+
Types: str
|
|
1536
|
+
|
|
1537
|
+
RETURNS:
|
|
1538
|
+
Object of FeatureGroup
|
|
1539
|
+
|
|
1540
|
+
RAISES:
|
|
1541
|
+
TeradataMLException
|
|
1542
|
+
|
|
1543
|
+
EXAMPLES:
|
|
1544
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
1545
|
+
# Create DataFrame on sales data.
|
|
1546
|
+
>>> load_example_data("dataframe", "sales")
|
|
1547
|
+
>>> df = DataFrame("sales")
|
|
1548
|
+
>>> df
|
|
1549
|
+
Feb Jan Mar Apr datetime
|
|
1550
|
+
accounts
|
|
1551
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
1552
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
1553
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
1554
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
1555
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
1556
|
+
|
|
1557
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1558
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1559
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1560
|
+
# Setup FeatureStore for this repository.
|
|
1561
|
+
>>> fs.setup()
|
|
1562
|
+
True
|
|
1563
|
+
|
|
1564
|
+
# Create FeatureGroup with name 'sales' from DataFrame.
|
|
1565
|
+
>>> fg = FeatureGroup.from_DataFrame(
|
|
1566
|
+
... name="sales", df=df, entity_columns="accounts", timestamp_column="datetime")
|
|
1567
|
+
# Apply the FeatureGroup to FeatureStore.
|
|
1568
|
+
>>> fs.apply(fg)
|
|
1569
|
+
True
|
|
1570
|
+
|
|
1571
|
+
# Get FeatureGroup with group name 'sales' from repo 'vfs_v1'.
|
|
1572
|
+
>>> fg = fs.get_feature_group('sales')
|
|
1573
|
+
>>> fg
|
|
1574
|
+
FeatureGroup(sales, features=[Feature(name=Jan), Feature(name=Feb), Feature(name=Apr), Feature(name=Mar)], entity=Entity(name=sales), data_source=DataSource(name=sales))
|
|
1575
|
+
>>>
|
|
1576
|
+
"""
|
|
1577
|
+
self._logger.info(f"Getting feature group '{name}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
1578
|
+
|
|
1579
|
+
argument_validation_params = []
|
|
1580
|
+
argument_validation_params.append(["name", name, False, (str), True])
|
|
1581
|
+
|
|
1582
|
+
# Validate argument types
|
|
1583
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
1584
|
+
|
|
1585
|
+
df = self.list_feature_groups()
|
|
1586
|
+
df = df[(df['name'] == name) &
|
|
1587
|
+
(df['data_domain'] == self.__data_domain)]
|
|
1588
|
+
|
|
1589
|
+
# Check if a feature group with that name exists or not. If not, raise error.
|
|
1590
|
+
if df.shape[0] == 0:
|
|
1591
|
+
self._logger.debug(f"Feature group '{name}' not found in current data domain '{self.__data_domain}', checking other domains")
|
|
1592
|
+
res = _FSUtils._get_data_domains(self.__repo, name, 'feature_group')
|
|
1593
|
+
if res:
|
|
1594
|
+
self._logger.debug(f"Feature group '{name}' exists in other domains: {res}")
|
|
1595
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
1596
|
+
error_msg = Messages.get_message(msg_code, "FeatureGroup", "name '{}'".format(name),
|
|
1597
|
+
self.__data_domain, res)
|
|
1598
|
+
else:
|
|
1599
|
+
self._logger.debug(f"Feature group '{name}' does not exist in any domain")
|
|
1600
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
1601
|
+
error_msg = Messages.get_message(msg_code, "FeatureGroup", "name '{}'".format(name),
|
|
1602
|
+
self.__data_domain)
|
|
1603
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
1604
|
+
|
|
1605
|
+
self._logger.debug(f"Successfully retrieved feature group '{name}' from data domain '{self.__data_domain}'")
|
|
1606
|
+
return FeatureGroup._from_df(df,
|
|
1607
|
+
self.__repo,
|
|
1608
|
+
self.__get_features_df(),
|
|
1609
|
+
self.__get_entity_df(),
|
|
1610
|
+
self.__get_data_source_df(),
|
|
1611
|
+
data_domain=self.__data_domain
|
|
1612
|
+
)
|
|
1613
|
+
|
|
1614
|
+
def get_entity(self, name):
|
|
1615
|
+
"""
|
|
1616
|
+
DESCRIPTION:
|
|
1617
|
+
Get the entity from feature store.
|
|
1618
|
+
|
|
1619
|
+
PARAMETERS:
|
|
1620
|
+
name:
|
|
1621
|
+
Required Argument.
|
|
1622
|
+
Specifies the name of the entity.
|
|
1623
|
+
Types: str
|
|
1624
|
+
|
|
1625
|
+
RETURNS:
|
|
1626
|
+
Object of Entity.
|
|
1627
|
+
|
|
1628
|
+
RAISES:
|
|
1629
|
+
None
|
|
1630
|
+
|
|
1631
|
+
EXAMPLES:
|
|
1632
|
+
>>> from teradataml import DataFrame, Entity, FeatureStore, load_example_data
|
|
1633
|
+
# Create DataFrame on admissions data.
|
|
1634
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1635
|
+
>>> df = DataFrame("admissions_train")
|
|
1636
|
+
>>> df
|
|
1637
|
+
masters gpa stats programming admitted
|
|
1638
|
+
id
|
|
1639
|
+
34 yes 3.85 Advanced Beginner 0
|
|
1640
|
+
32 yes 3.46 Advanced Beginner 0
|
|
1641
|
+
11 no 3.13 Advanced Advanced 1
|
|
1642
|
+
40 yes 3.95 Novice Beginner 0
|
|
1643
|
+
38 yes 2.65 Advanced Beginner 1
|
|
1644
|
+
36 no 3.00 Advanced Novice 0
|
|
1645
|
+
7 yes 2.33 Novice Novice 1
|
|
1646
|
+
26 yes 3.57 Advanced Advanced 1
|
|
1647
|
+
19 yes 1.98 Advanced Advanced 0
|
|
1648
|
+
13 no 4.00 Advanced Novice 1
|
|
1649
|
+
|
|
1650
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1651
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1652
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1653
|
+
# Setup FeatureStore for this repository.
|
|
1654
|
+
>>> fs.setup()
|
|
1655
|
+
True
|
|
1656
|
+
|
|
1657
|
+
# Create Entity for column 'id' with name 'admissions_id'.
|
|
1658
|
+
>>> entity = Entity(name='admissions_id', description="Entity for admissions", columns=df.id)
|
|
1659
|
+
# Apply the Entity to FeatureStore 'vfs_v1'.
|
|
1660
|
+
>>> fs.apply(entity)
|
|
1661
|
+
True
|
|
1662
|
+
|
|
1663
|
+
# Get the Entity 'admissions_id' from repo 'vfs_v1'
|
|
1664
|
+
>>> entity = fs.get_entity('admissions_id')
|
|
1665
|
+
>>> entity
|
|
1666
|
+
Entity(name=admissions_id)
|
|
1667
|
+
"""
|
|
1668
|
+
self._logger.info(f"Getting entity '{name}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
1669
|
+
|
|
1670
|
+
argument_validation_params = []
|
|
1671
|
+
argument_validation_params.append(["name", name, False, (str), True])
|
|
1672
|
+
|
|
1673
|
+
# Validate argument types
|
|
1674
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
1675
|
+
|
|
1676
|
+
df = self.__get_entity_df()
|
|
1677
|
+
df = df[(df['name'] == name) &
|
|
1678
|
+
(df['data_domain'] == self.__data_domain)]
|
|
1679
|
+
|
|
1680
|
+
# Check if entity with that name exists or not. If not, raise error.
|
|
1681
|
+
if df.shape[0] == 0:
|
|
1682
|
+
res = _FSUtils._get_data_domains(self.__repo, name, 'entity')
|
|
1683
|
+
if res:
|
|
1684
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
1685
|
+
error_msg = Messages.get_message(msg_code, "Entity", "name '{}'".format(name),
|
|
1686
|
+
self.__data_domain, res)
|
|
1687
|
+
else:
|
|
1688
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
1689
|
+
error_msg = Messages.get_message(msg_code, "Entity", "name '{}'".format(name),
|
|
1690
|
+
self.__data_domain)
|
|
1691
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
1692
|
+
|
|
1693
|
+
return Entity._from_df(df)
|
|
1694
|
+
|
|
1695
|
+
def get_data_source(self, name):
|
|
1696
|
+
"""
|
|
1697
|
+
DESCRIPTION:
|
|
1698
|
+
Get the data source from feature store.
|
|
1699
|
+
|
|
1700
|
+
PARAMETERS:
|
|
1701
|
+
name:
|
|
1702
|
+
Required Argument.
|
|
1703
|
+
Specifies the name of the data source.
|
|
1704
|
+
Types: str
|
|
1705
|
+
|
|
1706
|
+
RETURNS:
|
|
1707
|
+
Object of DataSource.
|
|
1708
|
+
|
|
1709
|
+
RAISES:
|
|
1710
|
+
TeradataMLException
|
|
1711
|
+
|
|
1712
|
+
EXAMPLES:
|
|
1713
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
1714
|
+
# Create DataFrame on admissions data.
|
|
1715
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1716
|
+
>>> df = DataFrame("admissions_train")
|
|
1717
|
+
>>> df
|
|
1718
|
+
masters gpa stats programming admitted
|
|
1719
|
+
id
|
|
1720
|
+
34 yes 3.85 Advanced Beginner 0
|
|
1721
|
+
32 yes 3.46 Advanced Beginner 0
|
|
1722
|
+
11 no 3.13 Advanced Advanced 1
|
|
1723
|
+
40 yes 3.95 Novice Beginner 0
|
|
1724
|
+
38 yes 2.65 Advanced Beginner 1
|
|
1725
|
+
36 no 3.00 Advanced Novice 0
|
|
1726
|
+
7 yes 2.33 Novice Novice 1
|
|
1727
|
+
26 yes 3.57 Advanced Advanced 1
|
|
1728
|
+
19 yes 1.98 Advanced Advanced 0
|
|
1729
|
+
13 no 4.00 Advanced Novice 1
|
|
1730
|
+
|
|
1731
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1732
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1733
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1734
|
+
# Setup FeatureStore for this repository.
|
|
1735
|
+
>>> fs.setup()
|
|
1736
|
+
True
|
|
1737
|
+
|
|
1738
|
+
# Create DataSource using DataFrame 'df' with name 'admissions'.
|
|
1739
|
+
>>> ds = DataSource('admissions', source=df)
|
|
1740
|
+
# Apply the DataSource to FeatureStore 'vfs_v1'.
|
|
1741
|
+
>>> fs.apply(ds)
|
|
1742
|
+
True
|
|
1743
|
+
|
|
1744
|
+
# Get the DataSource 'admissions' from repo 'vfs_v1'
|
|
1745
|
+
>>> ds = fs.get_data_source('admissions')
|
|
1746
|
+
>>> ds
|
|
1747
|
+
DataSource(name=admissions)
|
|
1748
|
+
"""
|
|
1749
|
+
self._logger.info(f"Getting data source '{name}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
1750
|
+
|
|
1751
|
+
argument_validation_params = []
|
|
1752
|
+
argument_validation_params.append(["name", name, False, (str), True])
|
|
1753
|
+
|
|
1754
|
+
# Validate argument types
|
|
1755
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
1756
|
+
|
|
1757
|
+
df = self.__get_data_source_df()
|
|
1758
|
+
df = df[(df['name'] == name) &
|
|
1759
|
+
(df['data_domain'] == self.__data_domain)]
|
|
1760
|
+
|
|
1761
|
+
# Check if a data source with that name exists or not. If not, raise error.
|
|
1762
|
+
if df.shape[0] == 0:
|
|
1763
|
+
res = _FSUtils._get_data_domains(self.__repo, name, 'data_source')
|
|
1764
|
+
if res:
|
|
1765
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
1766
|
+
error_msg = Messages.get_message(msg_code, "DataSource", "name '{}'".format(name),
|
|
1767
|
+
self.__data_domain, res)
|
|
1768
|
+
else:
|
|
1769
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
1770
|
+
error_msg = Messages.get_message(msg_code, "DataSource", "name '{}'".format(name),
|
|
1771
|
+
self.__data_domain)
|
|
1772
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
1773
|
+
|
|
1774
|
+
return DataSource._from_df(df)
|
|
1775
|
+
|
|
1776
|
+
def get_feature_process(self, object, entity=None, features=None, description=None):
|
|
1777
|
+
"""
|
|
1778
|
+
DESCRIPTION:
|
|
1779
|
+
Retrieves the FeatureProcess object.
|
|
1780
|
+
|
|
1781
|
+
PARAMETERS:
|
|
1782
|
+
object:
|
|
1783
|
+
Required Argument.
|
|
1784
|
+
Specifies the source to ingest feature values. It can be one of the following:
|
|
1785
|
+
* teradataml DataFrame
|
|
1786
|
+
* Feature group
|
|
1787
|
+
* Process id
|
|
1788
|
+
Notes:
|
|
1789
|
+
* If "object" is of type teradataml DataFrame, then "entity"
|
|
1790
|
+
and "features" should be provided.
|
|
1791
|
+
* If "object" is of type str, then it is considered as
|
|
1792
|
+
as process id of an existing FeatureProcess and reruns the
|
|
1793
|
+
process. Entity and features are taken from the existing
|
|
1794
|
+
feature process. Hence, the arguments "entity" and "features"
|
|
1795
|
+
are ignored.
|
|
1796
|
+
* If "object" is of type FeatureGroup, then entity and features
|
|
1797
|
+
are taken from the FeatureGroup. Hence, the arguments "entity"
|
|
1798
|
+
and "features" are ignored.
|
|
1799
|
+
Types: DataFrame or FeatureGroup or str
|
|
1800
|
+
|
|
1801
|
+
entity:
|
|
1802
|
+
Optional Argument.
|
|
1803
|
+
Specifies Entity for DataFrame.
|
|
1804
|
+
Notes:
|
|
1805
|
+
* Ignored when "object" is of type FeatureGroup or str.
|
|
1806
|
+
* If a string or list of strings is provided, then "object" should
|
|
1807
|
+
have these columns in it.
|
|
1808
|
+
* If Entity object is provided, then associated columns in Entity
|
|
1809
|
+
object should be present in DataFrame.
|
|
1810
|
+
Types: Entity or str or list of str
|
|
1811
|
+
|
|
1812
|
+
features:
|
|
1813
|
+
Optional Argument.
|
|
1814
|
+
Specifies list of features to be considered in feature process. Feature
|
|
1815
|
+
ingestion takes place only for these features.
|
|
1816
|
+
Note:
|
|
1817
|
+
* Ignored when "object" is of type FeatureGroup or str.
|
|
1818
|
+
Types: Feature or list of Feature or str or list of str.
|
|
1819
|
+
|
|
1820
|
+
description:
|
|
1821
|
+
Optional Argument.
|
|
1822
|
+
Specifies description for the FeatureProcess.
|
|
1823
|
+
Types: str
|
|
1824
|
+
|
|
1825
|
+
RETURNS:
|
|
1826
|
+
FeatureProcess
|
|
1827
|
+
|
|
1828
|
+
RAISES:
|
|
1829
|
+
None.
|
|
1830
|
+
|
|
1831
|
+
EXAMPLES:
|
|
1832
|
+
>>> from teradataml import FeatureStore
|
|
1833
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1834
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1835
|
+
# Setup FeatureStore for this repository.
|
|
1836
|
+
>>> fs.setup()
|
|
1837
|
+
True
|
|
1838
|
+
|
|
1839
|
+
# Load the admissions data to Vantage.
|
|
1840
|
+
>>> from teradataml import DataFrame, load_example_data
|
|
1841
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
1842
|
+
>>> admission_df = DataFrame("admissions_train")
|
|
1843
|
+
|
|
1844
|
+
>>> fp = FeatureProcess(repo='vfs_v1',
|
|
1845
|
+
... data_domain='d1',
|
|
1846
|
+
... object=admission_df,
|
|
1847
|
+
... entity='id',
|
|
1848
|
+
... features=['stats', 'programming', 'admitted'])
|
|
1849
|
+
>>> fp.run()
|
|
1850
|
+
Process '0d365f08-66b0-11f0-88ff-b0dcef8381ea' started.
|
|
1851
|
+
Process '0d365f08-66b0-11f0-88ff-b0dcef8381ea' completed.
|
|
1852
|
+
|
|
1853
|
+
>>> fs.get_feature_process(object='0d365f08-66b0-11f0-88ff-b0dcef8381ea')
|
|
1854
|
+
FeatureProcess(repo=vfs_v1, data_domain=d1, process_id=0d365f08-66b0-11f0-88ff-b0dcef8381ea)
|
|
1855
|
+
"""
|
|
1856
|
+
self._logger.info(f"Getting FeatureProcess from repository '{self.__repo}', data_domain '{self.__data_domain}', object: {object}, entity: {entity}, features: {features}")
|
|
1857
|
+
|
|
1858
|
+
return FeatureProcess(repo=self.__repo,
|
|
1859
|
+
data_domain=self.__data_domain,
|
|
1860
|
+
object=object,
|
|
1861
|
+
entity=entity,
|
|
1862
|
+
features=features,
|
|
1863
|
+
description=description
|
|
1864
|
+
)
|
|
1865
|
+
|
|
1866
|
+
def get_feature_catalog(self):
|
|
1867
|
+
"""
|
|
1868
|
+
DESCRIPTION:
|
|
1869
|
+
Retrieves FeatureCatalog based on the feature store's repo and data domain.
|
|
1870
|
+
|
|
1871
|
+
PARAMETERS:
|
|
1872
|
+
None.
|
|
1873
|
+
|
|
1874
|
+
RETURNS:
|
|
1875
|
+
FeatureCatalog
|
|
1876
|
+
|
|
1877
|
+
RAISES:
|
|
1878
|
+
None.
|
|
1879
|
+
|
|
1880
|
+
EXAMPLES:
|
|
1881
|
+
>>> from teradataml import FeatureStore
|
|
1882
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1883
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
1884
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1885
|
+
# Setup FeatureStore for this repository.
|
|
1886
|
+
>>> fs.setup()
|
|
1887
|
+
True
|
|
1888
|
+
|
|
1889
|
+
# Load the sales data to Vantage.
|
|
1890
|
+
from teradataml import load_example_data
|
|
1891
|
+
>>> load_example_data("dataframe", "sales")
|
|
1892
|
+
>>> df = DataFrame("sales")
|
|
1893
|
+
|
|
1894
|
+
# Create a feature process.
|
|
1895
|
+
>>> from teradataml import FeatureProcess
|
|
1896
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
1897
|
+
... data_domain='sales',
|
|
1898
|
+
... object=df,
|
|
1899
|
+
... entity="accounts",
|
|
1900
|
+
... features=["Jan", "Feb", "Mar", "Apr"])
|
|
1901
|
+
>>> fp.run()
|
|
1902
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
|
|
1903
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
|
|
1904
|
+
|
|
1905
|
+
# Get FeatureCatalog from FeatureStore.
|
|
1906
|
+
>>> fs.get_feature_catalog()
|
|
1907
|
+
FeatureCatalog(repo=vfs_v1, data_domain=sales)
|
|
1908
|
+
"""
|
|
1909
|
+
self._logger.info(f"Getting FeatureCatalog for repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
1910
|
+
result = FeatureCatalog(repo=self.__repo, data_domain=self.__data_domain)
|
|
1911
|
+
self._logger.debug(f"Created FeatureCatalog object: {result}")
|
|
1912
|
+
return result
|
|
1913
|
+
|
|
1914
|
+
def get_data_domain(self):
|
|
1915
|
+
"""
|
|
1916
|
+
DESCRIPTION:
|
|
1917
|
+
Retrieves DataDomain based on the feature store's repo and data domain.
|
|
1918
|
+
|
|
1919
|
+
PARAMETERS:
|
|
1920
|
+
None
|
|
1921
|
+
|
|
1922
|
+
RETURNS:
|
|
1923
|
+
DataDomain
|
|
1924
|
+
|
|
1925
|
+
RAISES:
|
|
1926
|
+
None.
|
|
1927
|
+
|
|
1928
|
+
EXAMPLES:
|
|
1929
|
+
>>> from teradataml import FeatureStore
|
|
1930
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1931
|
+
>>> fs = FeatureStore('vfs_v1', data_domain='sales')
|
|
1932
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1933
|
+
# Setup FeatureStore for this repository.
|
|
1934
|
+
>>> fs.setup()
|
|
1935
|
+
|
|
1936
|
+
# Get DataDomain from FeatureStore.
|
|
1937
|
+
>>> fs.get_data_domain()
|
|
1938
|
+
DataDomain(repo=vfs_v1, data_domain=sales)
|
|
1939
|
+
"""
|
|
1940
|
+
self._logger.info(f"Getting DataDomain for repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
1941
|
+
result = DataDomain(repo=self.__repo, data_domain=self.__data_domain)
|
|
1942
|
+
self._logger.debug(f"Created DataDomain object: {result}")
|
|
1943
|
+
return result
|
|
1944
|
+
|
|
1945
|
+
def get_dataset_catalog(self):
|
|
1946
|
+
"""
|
|
1947
|
+
DESCRIPTION:
|
|
1948
|
+
Retrieves DatasetCatalog based on the feature store's repo and data domain.
|
|
1949
|
+
|
|
1950
|
+
PARAMETERS:
|
|
1951
|
+
None.
|
|
1952
|
+
|
|
1953
|
+
RETURNS:
|
|
1954
|
+
DatasetCatalog
|
|
1955
|
+
|
|
1956
|
+
RAISES:
|
|
1957
|
+
None.
|
|
1958
|
+
|
|
1959
|
+
EXAMPLES:
|
|
1960
|
+
>>> from teradataml import FeatureStore
|
|
1961
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
1962
|
+
>>> fs = FeatureStore('vfs_v1', data_domain='sales')
|
|
1963
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
1964
|
+
# Setup FeatureStore for this repository.
|
|
1965
|
+
>>> fs.setup()
|
|
1966
|
+
|
|
1967
|
+
# Load the sales data to Vantage.
|
|
1968
|
+
>>> from teradataml import load_example_data
|
|
1969
|
+
>>> load_example_data("dataframe", "sales")
|
|
1970
|
+
>>> df = DataFrame("sales")
|
|
1971
|
+
|
|
1972
|
+
# Create a feature process.
|
|
1973
|
+
>>> from teradataml import FeatureProcess
|
|
1974
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
1975
|
+
... data_domain='sales',
|
|
1976
|
+
... object=df,
|
|
1977
|
+
... entity="accounts",
|
|
1978
|
+
... features=["Jan", "Feb", "Mar", "Apr"])
|
|
1979
|
+
>>> fp.run()
|
|
1980
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
|
|
1981
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
|
|
1982
|
+
True
|
|
1983
|
+
|
|
1984
|
+
# Build the dataset.
|
|
1985
|
+
>>> dc = DatasetCatalog(repo='vfs_v1', data_domain='sales')
|
|
1986
|
+
>>> dataset = dc.build_dataset(entity='accounts',
|
|
1987
|
+
... selected_features = {
|
|
1988
|
+
... 'Jan': fp.process_id,
|
|
1989
|
+
... 'Feb': fp.process_id},
|
|
1990
|
+
... view_name='ds_jan_feb',
|
|
1991
|
+
... description='Dataset with Jan and Feb features')
|
|
1992
|
+
|
|
1993
|
+
# Get DatasetCatalog from FeatureStore.
|
|
1994
|
+
>>> fs.get_dataset_catalog()
|
|
1995
|
+
DatasetCatalog(repo=vfs_v1, data_domain=sales)
|
|
1996
|
+
"""
|
|
1997
|
+
self._logger.info(f"Getting DatasetCatalog for repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
1998
|
+
result = DatasetCatalog(repo=self.__repo, data_domain=self.__data_domain)
|
|
1999
|
+
self._logger.debug(f"Created DatasetCatalog object: {result}")
|
|
2000
|
+
return result
|
|
2001
|
+
|
|
2002
|
+
def set_features_inactive(self, names):
|
|
2003
|
+
"""
|
|
2004
|
+
DESCRIPTION:
|
|
2005
|
+
Mark the feature status as 'inactive'. Note that, inactive features are
|
|
2006
|
+
not available for any further processing. Set the status as 'active' with
|
|
2007
|
+
"set_features_active()" method.
|
|
2008
|
+
|
|
2009
|
+
PARAMETERS:
|
|
2010
|
+
names:
|
|
2011
|
+
Required Argument.
|
|
2012
|
+
Specifies the name(s) of the feature(s).
|
|
2013
|
+
Types: str OR list of str
|
|
2014
|
+
|
|
2015
|
+
RETURNS:
|
|
2016
|
+
bool
|
|
2017
|
+
|
|
2018
|
+
RAISES:
|
|
2019
|
+
teradataMLException
|
|
2020
|
+
|
|
2021
|
+
EXAMPLES:
|
|
2022
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
2023
|
+
# Create DataFrame on admissions data.
|
|
2024
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
2025
|
+
>>> df = DataFrame("admissions_train")
|
|
2026
|
+
>>> df
|
|
2027
|
+
masters gpa stats programming admitted
|
|
2028
|
+
id
|
|
2029
|
+
34 yes 3.85 Advanced Beginner 0
|
|
2030
|
+
32 yes 3.46 Advanced Beginner 0
|
|
2031
|
+
11 no 3.13 Advanced Advanced 1
|
|
2032
|
+
40 yes 3.95 Novice Beginner 0
|
|
2033
|
+
38 yes 2.65 Advanced Beginner 1
|
|
2034
|
+
36 no 3.00 Advanced Novice 0
|
|
2035
|
+
7 yes 2.33 Novice Novice 1
|
|
2036
|
+
26 yes 3.57 Advanced Advanced 1
|
|
2037
|
+
19 yes 1.98 Advanced Advanced 0
|
|
2038
|
+
13 no 4.00 Advanced Novice 1
|
|
2039
|
+
|
|
2040
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
2041
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
2042
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
2043
|
+
# Setup FeatureStore for this repository.
|
|
2044
|
+
>>> fs.setup()
|
|
2045
|
+
True
|
|
2046
|
+
|
|
2047
|
+
# Create FeatureGroup from DataFrame df.
|
|
2048
|
+
>>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
|
|
2049
|
+
# Apply the FeatureGroup to FeatureStore 'vfs_v1'.
|
|
2050
|
+
>>> fs.apply(fg)
|
|
2051
|
+
True
|
|
2052
|
+
|
|
2053
|
+
# Get FeatureGroup 'admissions' from FeatureStore.
|
|
2054
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
2055
|
+
>>> fg
|
|
2056
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
2057
|
+
|
|
2058
|
+
# Example 1: Set the Feature 'programming' inactive.
|
|
2059
|
+
# Set the Feature 'programming' inactive.
|
|
2060
|
+
>>> fs.set_features_inactive('programming')
|
|
2061
|
+
True
|
|
2062
|
+
|
|
2063
|
+
# Get FeatureGroup again after setting feature inactive.
|
|
2064
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
2065
|
+
>>> fg
|
|
2066
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
2067
|
+
|
|
2068
|
+
"""
|
|
2069
|
+
self._logger.info(f"Setting features inactive: {names} in repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
2070
|
+
return self.__set_active_inactive_features(names, active=False)
|
|
2071
|
+
|
|
2072
|
+
def set_features_active(self, names):
|
|
2073
|
+
"""
|
|
2074
|
+
DESCRIPTION:
|
|
2075
|
+
Mark the feature status as active. Set the status as 'inactive' with
|
|
2076
|
+
"set_features_inactive()" method. Note that, inactive features are
|
|
2077
|
+
not available for any further processing.
|
|
2078
|
+
|
|
2079
|
+
PARAMETERS:
|
|
2080
|
+
names:
|
|
2081
|
+
Required Argument.
|
|
2082
|
+
Specifies the name(s) of the feature(s).
|
|
2083
|
+
Types: str OR list of str
|
|
2084
|
+
|
|
2085
|
+
RETURNS:
|
|
2086
|
+
bool
|
|
2087
|
+
|
|
2088
|
+
RAISES:
|
|
2089
|
+
teradataMLException
|
|
2090
|
+
|
|
2091
|
+
EXAMPLES:
|
|
2092
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
2093
|
+
# Create DataFrame on admissions data.
|
|
2094
|
+
>>> load_example_data("dataframe", "admissions_train")
|
|
2095
|
+
>>> df = DataFrame("admissions_train")
|
|
2096
|
+
>>> df
|
|
2097
|
+
masters gpa stats programming admitted
|
|
2098
|
+
id
|
|
2099
|
+
34 yes 3.85 Advanced Beginner 0
|
|
2100
|
+
32 yes 3.46 Advanced Beginner 0
|
|
2101
|
+
11 no 3.13 Advanced Advanced 1
|
|
2102
|
+
40 yes 3.95 Novice Beginner 0
|
|
2103
|
+
38 yes 2.65 Advanced Beginner 1
|
|
2104
|
+
36 no 3.00 Advanced Novice 0
|
|
2105
|
+
7 yes 2.33 Novice Novice 1
|
|
2106
|
+
26 yes 3.57 Advanced Advanced 1
|
|
2107
|
+
19 yes 1.98 Advanced Advanced 0
|
|
2108
|
+
13 no 4.00 Advanced Novice 1
|
|
2109
|
+
|
|
2110
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
2111
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
2112
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
2113
|
+
# Setup FeatureStore for this repository.
|
|
2114
|
+
>>> fs.setup()
|
|
2115
|
+
True
|
|
2116
|
+
|
|
2117
|
+
# Create FeatureGroup from DataFrame df.
|
|
2118
|
+
>>> fg = FeatureGroup.from_DataFrame(name='admissions', df=df, entity_columns='id')
|
|
2119
|
+
# Apply the FeatureGroup to FeatureStore 'vfs_v1'.
|
|
2120
|
+
>>> fs.apply(fg)
|
|
2121
|
+
True
|
|
2122
|
+
|
|
2123
|
+
# Get FeatureGroup 'admissions' from FeatureStore.
|
|
2124
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
2125
|
+
>>> fg
|
|
2126
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
2127
|
+
|
|
2128
|
+
# Example 1: Set the Feature 'programming' inactive.
|
|
2129
|
+
# Set the Feature 'programming' inactive.
|
|
2130
|
+
>>> fs.set_features_inactive('programming')
|
|
2131
|
+
True
|
|
2132
|
+
|
|
2133
|
+
# Get FeatureGroup again after setting feature inactive.
|
|
2134
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
2135
|
+
>>> fg
|
|
2136
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=stats), Feature(name=admitted), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
2137
|
+
|
|
2138
|
+
# Mark Feature 'programming' from 'inactive' to 'active'.
|
|
2139
|
+
>>> fs.set_features_active('programming')
|
|
2140
|
+
# Get FeatureGroup again after setting feature active.
|
|
2141
|
+
>>> fg = fs.get_feature_group('admissions')
|
|
2142
|
+
>>> fg
|
|
2143
|
+
FeatureGroup(admissions, features=[Feature(name=masters), Feature(name=programming), Feature(name=admitted), Feature(name=stats), Feature(name=gpa)], entity=Entity(name=admissions), data_source=DataSource(name=admissions))
|
|
2144
|
+
>>>
|
|
2145
|
+
"""
|
|
2146
|
+
self._logger.info(f"Setting features active: {names} in repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
2147
|
+
return self.__set_active_inactive_features(names, active=True)
|
|
2148
|
+
|
|
2149
|
+
def __set_active_inactive_features(self, names, active):
|
|
2150
|
+
"""
|
|
2151
|
+
DESCRIPTION:
|
|
2152
|
+
Internal function to either active or inactive features.
|
|
2153
|
+
|
|
2154
|
+
PARAMETERS:
|
|
2155
|
+
names:
|
|
2156
|
+
Required Argument.
|
|
2157
|
+
Specifies the name the feature.
|
|
2158
|
+
Types: str OR list of str
|
|
2159
|
+
|
|
2160
|
+
RETURNS:
|
|
2161
|
+
bool
|
|
2162
|
+
|
|
2163
|
+
RAISES:
|
|
2164
|
+
teradataMLException
|
|
2165
|
+
|
|
2166
|
+
EXAMPLES:
|
|
2167
|
+
# Example 1: Archive the feature 'feature1' in the repo
|
|
2168
|
+
# 'vfs_v1'.
|
|
2169
|
+
>>> from teradataml import FeatureStore
|
|
2170
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
2171
|
+
>>> fs.__archive_unarchive_features(name='feature1')
|
|
2172
|
+
True
|
|
2173
|
+
>>>
|
|
2174
|
+
"""
|
|
2175
|
+
names = UtilFuncs._as_list(names)
|
|
2176
|
+
|
|
2177
|
+
argument_validation_params = []
|
|
2178
|
+
argument_validation_params.append(["names", names, False, (str, list), True])
|
|
2179
|
+
|
|
2180
|
+
# Validate argument types
|
|
2181
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
2182
|
+
|
|
2183
|
+
status = FeatureStatus.ACTIVE.name if active else FeatureStatus.INACTIVE.name
|
|
2184
|
+
|
|
2185
|
+
is_set = True
|
|
2186
|
+
if status == FeatureStatus.INACTIVE.name:
|
|
2187
|
+
# Get the joined df of '_efs_features' and '_efs_features_metadata'.
|
|
2188
|
+
feature_info_df = self.__get_feature_info_df()
|
|
2189
|
+
metadata_features = [feature.name for feature in feature_info_df.itertuples()]
|
|
2190
|
+
|
|
2191
|
+
# Form a list of user provided feature names which are
|
|
2192
|
+
# present in catalog and not present in catalog.
|
|
2193
|
+
catalog_features = []
|
|
2194
|
+
non_catalog_features = []
|
|
2195
|
+
for name in names:
|
|
2196
|
+
if name in metadata_features:
|
|
2197
|
+
catalog_features.append(name)
|
|
2198
|
+
else:
|
|
2199
|
+
non_catalog_features.append(name)
|
|
2200
|
+
|
|
2201
|
+
# If user provided all names are present in catalog.
|
|
2202
|
+
if len(catalog_features) == len(names):
|
|
2203
|
+
print("Feature(s) '{}' entries exists in feature catalog, cannot be set "
|
|
2204
|
+
"to inactive.".format(", ".join(catalog_features)))
|
|
2205
|
+
return False
|
|
2206
|
+
# If some of the user provided features present in catalog.
|
|
2207
|
+
elif len(catalog_features) > 0:
|
|
2208
|
+
print("Feature(s) '{}' entries exists in feature catalog, cannot be set "
|
|
2209
|
+
"to inactive.".format(", ".join(catalog_features)))
|
|
2210
|
+
is_set = is_set and False
|
|
2211
|
+
|
|
2212
|
+
# Assign feature names list which are not present in catalog.
|
|
2213
|
+
names = non_catalog_features
|
|
2214
|
+
|
|
2215
|
+
_update_data(table_name=self.__table_names['feature'],
|
|
2216
|
+
schema_name=self.__repo,
|
|
2217
|
+
update_columns_values={"status": status},
|
|
2218
|
+
update_conditions={"name": names}
|
|
2219
|
+
)
|
|
2220
|
+
|
|
2221
|
+
return is_set
|
|
2222
|
+
|
|
2223
|
+
def apply(self, object):
|
|
2224
|
+
"""
|
|
2225
|
+
DESCRIPTION:
|
|
2226
|
+
Register objects to repository.
|
|
2227
|
+
Note:
|
|
2228
|
+
* If the object is an Entity or FeatureGroup and the same entity or feature group is already
|
|
2229
|
+
registered in the repository, it is not updated.
|
|
2230
|
+
* If the entity or feature group is associated with any feature process, an error is raised
|
|
2231
|
+
while modifying these objects.
|
|
2232
|
+
|
|
2233
|
+
PARAMETERS:
|
|
2234
|
+
object:
|
|
2235
|
+
Required Argument.
|
|
2236
|
+
Specifies the object to update the repository.
|
|
2237
|
+
Types: Feature OR DataSource OR Entity OR FeatureGroup.
|
|
2238
|
+
|
|
2239
|
+
RETURNS:
|
|
2240
|
+
bool.
|
|
2241
|
+
|
|
2242
|
+
RAISES:
|
|
2243
|
+
TeradataMLException
|
|
2244
|
+
|
|
2245
|
+
EXAMPLES:
|
|
2246
|
+
>>> from teradataml import FeatureStore, DataFrame, load_example_data
|
|
2247
|
+
# Create DataFrame on sales data.
|
|
2248
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
2249
|
+
>>> df = DataFrame("sales")
|
|
2250
|
+
|
|
2251
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
2252
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
2253
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
2254
|
+
# Setup FeatureStore for this repository.
|
|
2255
|
+
>>> fs.setup()
|
|
2256
|
+
True
|
|
2257
|
+
|
|
2258
|
+
# Example 1: create a Feature for column 'Feb' from 'sales' DataFrame
|
|
2259
|
+
# and register with repo 'vfs_v1'.
|
|
2260
|
+
>>> # Create Feature.
|
|
2261
|
+
>>> from teradataml import Feature
|
|
2262
|
+
>>> feature = Feature('sales:Feb', df.Feb)
|
|
2263
|
+
>>> # Register the above Feature with repo.
|
|
2264
|
+
>>> fs.apply(feature)
|
|
2265
|
+
True
|
|
2266
|
+
>>>
|
|
2267
|
+
|
|
2268
|
+
# Example 2: create Entity for 'sales' DataFrame and register
|
|
2269
|
+
# with repo 'vfs_v1'.
|
|
2270
|
+
>>> # Create Entity.
|
|
2271
|
+
>>> from teradataml import Entity
|
|
2272
|
+
>>> entity = Entity('sales:accounts', df.accounts)
|
|
2273
|
+
>>> # Register the above Entity with repo.
|
|
2274
|
+
>>> fs.apply(entity)
|
|
2275
|
+
True
|
|
2276
|
+
>>>
|
|
2277
|
+
|
|
2278
|
+
# Example 3: create DataSource for 'sales' DataFrame and register
|
|
2279
|
+
# with repo 'vfs_v1'.
|
|
2280
|
+
>>> # Create DataSource.
|
|
2281
|
+
>>> from teradataml import DataSource
|
|
2282
|
+
>>> ds = DataSource('Sales_Data', df)
|
|
2283
|
+
>>> # Register the above DataSource with repo.
|
|
2284
|
+
>>> fs.apply(ds)
|
|
2285
|
+
True
|
|
2286
|
+
>>>
|
|
2287
|
+
|
|
2288
|
+
# Example 4: create FeatureStore with all the objects
|
|
2289
|
+
# created in above examples and register with
|
|
2290
|
+
# repo 'vfs_v1'.
|
|
2291
|
+
>>> # Create FeatureGroup.
|
|
2292
|
+
>>> from teradataml import FeatureGroup
|
|
2293
|
+
>>> fg = FeatureGroup('Sales',
|
|
2294
|
+
... features=feature,
|
|
2295
|
+
... entity=entity,
|
|
2296
|
+
... data_source=data_source)
|
|
2297
|
+
>>> # Register the above FeatureStore with repo.
|
|
2298
|
+
>>> fs.apply(fg)
|
|
2299
|
+
True
|
|
2300
|
+
"""
|
|
2301
|
+
self._logger.info(f"Applying object to FeatureStore repository '{self.__repo}', data_domain '{self.__data_domain}', object type: {type(object).__name__}")
|
|
2302
|
+
|
|
2303
|
+
argument_validation_params = []
|
|
2304
|
+
argument_validation_params.append(["name", object, False, (Feature, Entity, DataSource, FeatureGroup)])
|
|
2305
|
+
|
|
2306
|
+
# Validate argument types
|
|
2307
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
2308
|
+
|
|
2309
|
+
result = object.publish(self.__repo, self.__data_domain)
|
|
2310
|
+
self._logger.debug(f"Successfully applied {type(object).__name__} to repository, result: {result}")
|
|
2311
|
+
return result
|
|
2312
|
+
|
|
2313
|
+
def get_data(self, process_id=None, entity=None, features=None,
|
|
2314
|
+
dataset_name=None, as_of=None, include_historic_records=False):
|
|
2315
|
+
"""
|
|
2316
|
+
DESCRIPTION:
|
|
2317
|
+
Returns teradataml DataFrame which has entities and feature values.
|
|
2318
|
+
Method generates dataset from following -
|
|
2319
|
+
* process_id
|
|
2320
|
+
* entity and features
|
|
2321
|
+
* dataset_name
|
|
2322
|
+
|
|
2323
|
+
PARAMETERS:
|
|
2324
|
+
process_id:
|
|
2325
|
+
Optional Argument.
|
|
2326
|
+
Either "process_id", "entity" and "features", "dataset_name" is mandatory.
|
|
2327
|
+
Specifies the process id of an existing feature process.
|
|
2328
|
+
Types: str
|
|
2329
|
+
|
|
2330
|
+
entity:
|
|
2331
|
+
Optional Argument.
|
|
2332
|
+
Specifies the name of the Entity or Object of Entity
|
|
2333
|
+
to be considered in the dataset.
|
|
2334
|
+
Types: str or Entity.
|
|
2335
|
+
|
|
2336
|
+
features:
|
|
2337
|
+
Optional Argument.
|
|
2338
|
+
Specifies the names of Features and the corresponding feature version
|
|
2339
|
+
to be included in the dataset.
|
|
2340
|
+
Notes:
|
|
2341
|
+
* Key is the name of the feature and value is the version of the
|
|
2342
|
+
feature.
|
|
2343
|
+
* Look at FeatureCatalog.list_feature_versions() to get the list of
|
|
2344
|
+
features and their versions.
|
|
2345
|
+
Types: dict
|
|
2346
|
+
|
|
2347
|
+
dataset_name:
|
|
2348
|
+
Optional Argument.
|
|
2349
|
+
Specifies the dataset name.
|
|
2350
|
+
Types: str
|
|
2351
|
+
|
|
2352
|
+
as_of:
|
|
2353
|
+
Optional Argument.
|
|
2354
|
+
Specifies the time to retrieve the Feature Values instead of
|
|
2355
|
+
retrieving the latest values.
|
|
2356
|
+
Notes:
|
|
2357
|
+
* Applicable only when "process_id" is passed to the function.
|
|
2358
|
+
* Ignored when "dataset_name" is passed.
|
|
2359
|
+
Types: str or datetime.datetime
|
|
2360
|
+
|
|
2361
|
+
include_historic_records:
|
|
2362
|
+
Optional Argument.
|
|
2363
|
+
Specifies whether to include historic data in the dataset.
|
|
2364
|
+
Note:
|
|
2365
|
+
* If "as_of" is specified, then the "include_historic_records" argument is ignored.
|
|
2366
|
+
Default Value: False.
|
|
2367
|
+
Types: bool.
|
|
2368
|
+
|
|
2369
|
+
|
|
2370
|
+
RETURNS:
|
|
2371
|
+
teradataml DataFrame.
|
|
2372
|
+
|
|
2373
|
+
RAISES:
|
|
2374
|
+
TeradataMLException
|
|
2375
|
+
|
|
2376
|
+
EXAMPLES:
|
|
2377
|
+
>>> from teradataml import DataFrame, FeatureStore, load_example_data
|
|
2378
|
+
# Create DataFrame on sales data.
|
|
2379
|
+
>>> load_example_data("dataframe", "sales")
|
|
2380
|
+
>>> df = DataFrame("sales")
|
|
2381
|
+
>>> df
|
|
2382
|
+
Feb Jan Mar Apr datetime
|
|
2383
|
+
accounts
|
|
2384
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
2385
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
2386
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
2387
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
2388
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
2389
|
+
|
|
2390
|
+
>>> repo = 'vfs_v1'
|
|
2391
|
+
>>> data_domain = 'sales'
|
|
2392
|
+
>>> fs = FeatureStore(repo=repo, data_domain=data_domain)
|
|
2393
|
+
FeatureStore is ready to use.
|
|
2394
|
+
|
|
2395
|
+
# Example 1: Get the data from process_id.
|
|
2396
|
+
>>> fp = FeatureProcess(repo=repo,
|
|
2397
|
+
... data_domain=data_domain,
|
|
2398
|
+
... object=df,
|
|
2399
|
+
... entity='accounts',
|
|
2400
|
+
... features=['Jan', 'Feb'])
|
|
2401
|
+
>>> fp.run()
|
|
2402
|
+
Process '1e9e8d64-6851-11f0-99c5-a30631e77953' started.
|
|
2403
|
+
Process '1e9e8d64-6851-11f0-99c5-a30631e77953' completed.
|
|
2404
|
+
True
|
|
2405
|
+
|
|
2406
|
+
>>> fs.get_data(process_id=fp.process_id)
|
|
2407
|
+
accounts Feb Jan
|
|
2408
|
+
0 Alpha Co 210.0 200.0
|
|
2409
|
+
1 Blue Inc 90.0 50.0
|
|
2410
|
+
2 Jones LLC 200.0 150.0
|
|
2411
|
+
3 Orange Inc 210.0 NaN
|
|
2412
|
+
4 Yellow Inc 90.0 NaN
|
|
2413
|
+
5 Red Inc 200.0 150.0
|
|
2414
|
+
|
|
2415
|
+
# Example 2: Get the data from entity and features.
|
|
2416
|
+
>>> fs.get_data(entity='accounts', features={'Jan': fp.process_id})
|
|
2417
|
+
accounts Jan
|
|
2418
|
+
0 Alpha Co 200.0
|
|
2419
|
+
1 Blue Inc 50.0
|
|
2420
|
+
2 Jones LLC 150.0
|
|
2421
|
+
3 Orange Inc NaN
|
|
2422
|
+
4 Yellow Inc NaN
|
|
2423
|
+
5 Red Inc 150.0
|
|
2424
|
+
|
|
2425
|
+
# Example 3: Get the data from dataset name.
|
|
2426
|
+
>>> dc = DatasetCatalog(repo=repo, data_domain=data_domain)
|
|
2427
|
+
>>> dc.build_dataset(entity='accounts',
|
|
2428
|
+
... selected_features={'Jan': fp.process_id,
|
|
2429
|
+
... 'Feb': fp.process_id},
|
|
2430
|
+
... view_name='test_get_data',
|
|
2431
|
+
... description='Dataset with Jan and Feb')
|
|
2432
|
+
>>> fs.get_data(dataset_name='test_get_data')
|
|
2433
|
+
accounts Feb Jan
|
|
2434
|
+
0 Alpha Co 210.0 200.0
|
|
2435
|
+
1 Blue Inc 90.0 50.0
|
|
2436
|
+
2 Jones LLC 200.0 150.0
|
|
2437
|
+
3 Orange Inc 210.0 NaN
|
|
2438
|
+
4 Yellow Inc 90.0 NaN
|
|
2439
|
+
5 Red Inc 200.0 150.0
|
|
2440
|
+
|
|
2441
|
+
|
|
2442
|
+
# Example 4: Get the data from Entity and Features, where entity
|
|
2443
|
+
# object and feature objects passed to the entity and
|
|
2444
|
+
# features arguments.
|
|
2445
|
+
>>> # Create features.
|
|
2446
|
+
>>> feature1 = Feature('sales:Mar',
|
|
2447
|
+
... df.Mar,
|
|
2448
|
+
... feature_type=FeatureType.CATEGORICAL)
|
|
2449
|
+
|
|
2450
|
+
>>> feature2 = Feature('sales:Apr',
|
|
2451
|
+
... df.Apr,
|
|
2452
|
+
... feature_type=FeatureType.CONTINUOUS)
|
|
2453
|
+
|
|
2454
|
+
>>> # Create entity.
|
|
2455
|
+
>>> entity = Entity(name='accounts_entity', columns=['accounts'])
|
|
2456
|
+
|
|
2457
|
+
>>> fp1 = FeatureProcess(repo=repo,
|
|
2458
|
+
... data_domain=data_domain,
|
|
2459
|
+
... object=df,
|
|
2460
|
+
... entity=entity,
|
|
2461
|
+
... features=[feature1, feature2])
|
|
2462
|
+
>>> fp1.run()
|
|
2463
|
+
Process '5522c034-684d-11f0-99c5-a30631e77953' started.
|
|
2464
|
+
Process '5522c034-684d-11f0-99c5-a30631e77953' completed.
|
|
2465
|
+
True
|
|
2466
|
+
|
|
2467
|
+
>>> fs.get_data(entity=entity, features={feature1.name: fp1.process_id,
|
|
2468
|
+
... feature2.name: fp1.process_id})
|
|
2469
|
+
accounts sales:Mar sales:Apr
|
|
2470
|
+
0 Alpha Co 215.0 250.0
|
|
2471
|
+
1 Blue Inc 95.0 101.0
|
|
2472
|
+
2 Jones LLC 140.0 180.0
|
|
2473
|
+
3 Orange Inc NaN 250.0
|
|
2474
|
+
4 Yellow Inc NaN NaN
|
|
2475
|
+
5 Red Inc 140.0 NaN
|
|
2476
|
+
|
|
2477
|
+
# Example 5: Get the data for the time passed by the user via the as_of argument.
|
|
2478
|
+
>>> import time
|
|
2479
|
+
>>> from datetime import datetime as dt, date as d
|
|
2480
|
+
|
|
2481
|
+
# Retrieve the record where accounts == 'Blue Inc'.
|
|
2482
|
+
>>> df_test = df[df['accounts'] == 'Blue Inc']
|
|
2483
|
+
>>> df_test
|
|
2484
|
+
Feb Jan Mar Apr datetime
|
|
2485
|
+
accounts
|
|
2486
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
2487
|
+
|
|
2488
|
+
# Example updates the data. Hence, creating a new table to avoid modifying the existing tables data.
|
|
2489
|
+
>>> df_test.to_sql('sales_test', if_exists='replace')
|
|
2490
|
+
>>> test_df = DataFrame('sales_test')
|
|
2491
|
+
>>> test_df
|
|
2492
|
+
accounts Feb Jan Mar Apr datetime
|
|
2493
|
+
0 Blue Inc 90.0 50 95 101 17/01/04
|
|
2494
|
+
|
|
2495
|
+
>>> # Create a feature process.
|
|
2496
|
+
>>> fp = FeatureProcess(repo=repo,
|
|
2497
|
+
... data_domain=data_domain,
|
|
2498
|
+
... object=test_df,
|
|
2499
|
+
... entity='accounts',
|
|
2500
|
+
... features=['Jan', 'Feb'])
|
|
2501
|
+
|
|
2502
|
+
>>> # Run the feature process
|
|
2503
|
+
>>> fp.run()
|
|
2504
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
|
|
2505
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
|
|
2506
|
+
True
|
|
2507
|
+
|
|
2508
|
+
>>> # Running the same process more than once to demonstrate how user can
|
|
2509
|
+
>>> # retrieve specific version of Features using argument 'as_of'.
|
|
2510
|
+
>>> # Wait for 20 seconds. Then update the data. Then run again.
|
|
2511
|
+
>>> time.sleep(20)
|
|
2512
|
+
>>> execute_sql("update sales_test set Jan = Jan * 10, Feb = Feb * 10")
|
|
2513
|
+
TeradataCursor uRowsHandle=269 bClosed=False
|
|
2514
|
+
|
|
2515
|
+
>>> # Run the feature process again.
|
|
2516
|
+
>>> fp.run()
|
|
2517
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
|
|
2518
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
|
|
2519
|
+
True
|
|
2520
|
+
|
|
2521
|
+
>>> # Then again wait for 20 seconds. Then update the data. Then run again.
|
|
2522
|
+
>>> time.sleep(20)
|
|
2523
|
+
>>> execute_sql("update sales_test set Jan = Jan * 10, Feb = Feb * 10")
|
|
2524
|
+
TeradataCursor uRowsHandle=397 bClosed=False
|
|
2525
|
+
|
|
2526
|
+
>>> # Run the feature process again.
|
|
2527
|
+
>>> fp.run()
|
|
2528
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' started.
|
|
2529
|
+
Process '6cb49b4b-79d4-11f0-8c5e-b0dcef8381ea' completed.
|
|
2530
|
+
True
|
|
2531
|
+
|
|
2532
|
+
# Retrieve specific version of Features at '2025-08-15 12:37:23'
|
|
2533
|
+
>>> as_of_time = dt(2025, 8, 15, 12, 37, 23)
|
|
2534
|
+
|
|
2535
|
+
>>> # time passed to as_of in datetime.datetime format.
|
|
2536
|
+
>>> fs.get_data(process_id=fp.process_id,
|
|
2537
|
+
... as_of=as_of_time)
|
|
2538
|
+
accounts Feb Jan
|
|
2539
|
+
0 Blue Inc 900.0 500
|
|
2540
|
+
|
|
2541
|
+
>>> # time passed to as_of in string format.
|
|
2542
|
+
>>> fs.get_data(process_id=fp.process_id,
|
|
2543
|
+
... as_of=as_of_time.strftime('%Y-%m-%d %H:%M:%S'))
|
|
2544
|
+
accounts Feb Jan
|
|
2545
|
+
0 Blue Inc 900.0 500
|
|
2546
|
+
|
|
2547
|
+
# Example 6: Get the data for the time passed by the user via the as_of argument
|
|
2548
|
+
# by sourcing entity and features.
|
|
2549
|
+
>>> # time passed to as_of in datetime.datetime format.
|
|
2550
|
+
>>> fs.get_data(entity='accounts',
|
|
2551
|
+
... features={'Feb': fp.process_id,
|
|
2552
|
+
... 'Jan': fp.process_id},
|
|
2553
|
+
... as_of=as_of_time)
|
|
2554
|
+
accounts Feb Jan
|
|
2555
|
+
0 Blue Inc 900.0 500
|
|
2556
|
+
|
|
2557
|
+
>>> # time passed to as_of in string format.
|
|
2558
|
+
>>> fs.get_data(entity='accounts',
|
|
2559
|
+
... features={'Feb': fp.process_id,
|
|
2560
|
+
... 'Jan': fp.process_id},
|
|
2561
|
+
... as_of=as_of_time.strftime('%Y-%m-%d %H:%M:%S'))
|
|
2562
|
+
accounts Feb Jan
|
|
2563
|
+
0 Blue Inc 900.0 500
|
|
2564
|
+
|
|
2565
|
+
# Example 7: Get the latest data for the given process_id.
|
|
2566
|
+
>>> fs.get_data(process_id=fp.process_id, include_historic_records=False)
|
|
2567
|
+
accounts Feb Jan
|
|
2568
|
+
0 Blue Inc 9000.0 5000
|
|
2569
|
+
|
|
2570
|
+
# Example 8: Get the historic data for the given process_id.
|
|
2571
|
+
>>> fs.get_data(process_id=fp.process_id, include_historic_records=True)
|
|
2572
|
+
accounts Feb Jan
|
|
2573
|
+
0 Blue Inc 9000.0 5000
|
|
2574
|
+
1 Blue Inc 90.0 50
|
|
2575
|
+
2 Blue Inc 90.0 5000
|
|
2576
|
+
3 Blue Inc 900.0 500
|
|
2577
|
+
4 Blue Inc 900.0 5000
|
|
2578
|
+
5 Blue Inc 900.0 50
|
|
2579
|
+
6 Blue Inc 90.0 500
|
|
2580
|
+
7 Blue Inc 9000.0 50
|
|
2581
|
+
8 Blue Inc 9000.0 500
|
|
2582
|
+
|
|
2583
|
+
# Example 9: Get the latest data for the given feature.
|
|
2584
|
+
>>> fs.get_data(entity='accounts', features={'Feb': fp.process_id}, include_historic_records=False)
|
|
2585
|
+
accounts Feb
|
|
2586
|
+
0 Blue Inc 9000.0
|
|
2587
|
+
|
|
2588
|
+
# Example 10: Get the historic data for the given feature.
|
|
2589
|
+
>>> fs.get_data(entity='accounts', features={'Feb': fp.process_id}, include_historic_records=True)
|
|
2590
|
+
accounts Feb
|
|
2591
|
+
0 Blue Inc 900.0
|
|
2592
|
+
1 Blue Inc 90.0
|
|
2593
|
+
2 Blue Inc 9000.0
|
|
2594
|
+
|
|
2595
|
+
"""
|
|
2596
|
+
self._logger.info(f"Getting data from repository '{self.__repo}', data_domain '{self.__data_domain}', process_id: {process_id}, entity: {entity}, features: {features}, dataset_name: {dataset_name}, as_of: {as_of}, include_historic_records: {include_historic_records}")
|
|
2597
|
+
|
|
2598
|
+
# Validate argument types
|
|
2599
|
+
args = []
|
|
2600
|
+
args.append(["process_id", process_id, True, (str), True])
|
|
2601
|
+
args.append(["entity", entity, True, (Entity, str), True])
|
|
2602
|
+
args.append(["features", features, True, (dict), True])
|
|
2603
|
+
args.append(["dataset_name", dataset_name, True, (str), True])
|
|
2604
|
+
args.append(["as_of", as_of, True, (str, dt), True])
|
|
2605
|
+
args.append(["include_historic_records", include_historic_records, True, (bool)])
|
|
2606
|
+
|
|
2607
|
+
_Validators._validate_function_arguments(args)
|
|
2608
|
+
|
|
2609
|
+
# Validate mutually exclusive arguments.
|
|
2610
|
+
_Validators._validate_mutually_exclusive_argument_groups({"process_id": process_id},
|
|
2611
|
+
{"dataset_name": dataset_name},
|
|
2612
|
+
{"entity": entity, "features": features})
|
|
2613
|
+
|
|
2614
|
+
# Validate whether entity and features are mutually inclusive.
|
|
2615
|
+
_Validators._validate_mutually_inclusive_arguments(entity, "entity",
|
|
2616
|
+
features, "features")
|
|
2617
|
+
|
|
2618
|
+
# Validate at least one argument is passed.
|
|
2619
|
+
_Validators._validate_any_argument_passed({"process_id": process_id,
|
|
2620
|
+
"entity' and 'features": entity,
|
|
2621
|
+
"dataset_name": dataset_name})
|
|
2622
|
+
|
|
2623
|
+
# If user pass view, return DataFrame directly.
|
|
2624
|
+
if dataset_name:
|
|
2625
|
+
return DataFrame(in_schema(self.__repo, dataset_name))
|
|
2626
|
+
|
|
2627
|
+
if process_id:
|
|
2628
|
+
entity, features = (
|
|
2629
|
+
self.__get_entity_and_features_from_process_id(process_id))
|
|
2630
|
+
|
|
2631
|
+
# Genarate the view name.
|
|
2632
|
+
view_name = UtilFuncs._generate_temp_table_name(databasename=self.__repo)
|
|
2633
|
+
|
|
2634
|
+
# When as_of is not None, get all the data instead of only latest.
|
|
2635
|
+
if as_of:
|
|
2636
|
+
include_historic_records = True
|
|
2637
|
+
|
|
2638
|
+
# Create the DatasetCatalog and build dataset on top of it.
|
|
2639
|
+
dc = DatasetCatalog(repo=self.__repo, data_domain=self.__data_domain)
|
|
2640
|
+
dataset = dc._build_dataset(
|
|
2641
|
+
entity, features,
|
|
2642
|
+
include_historic_records=include_historic_records,
|
|
2643
|
+
include_time_series=True if as_of else False,
|
|
2644
|
+
view_name=view_name,
|
|
2645
|
+
temporary=True)
|
|
2646
|
+
|
|
2647
|
+
if as_of:
|
|
2648
|
+
return self.__filter_dataset_by_as_of(dataset, entity, list(features.keys()), as_of)
|
|
2649
|
+
return dataset
|
|
2650
|
+
|
|
2651
|
+
def __get_entity_and_features_from_process_id(self, process_id):
|
|
2652
|
+
"""
|
|
2653
|
+
DESCRIPTION:
|
|
2654
|
+
Internal function to get entity_columns, feature_columns, and s
|
|
2655
|
+
elected_features using process_id.
|
|
2656
|
+
|
|
2657
|
+
PARAMETERS:
|
|
2658
|
+
process_id:
|
|
2659
|
+
Required Argument.
|
|
2660
|
+
Specifies the process id of FeatureProcess.
|
|
2661
|
+
Types: str
|
|
2662
|
+
|
|
2663
|
+
RETURNS:
|
|
2664
|
+
entity_id, selected_features
|
|
2665
|
+
|
|
2666
|
+
RAISES:
|
|
2667
|
+
None
|
|
2668
|
+
|
|
2669
|
+
EXAMPLES:
|
|
2670
|
+
>>> fs.__get_entity_and_features_from_process_id('123-acd')
|
|
2671
|
+
"""
|
|
2672
|
+
feature_ver = self.__get_feature_version()
|
|
2673
|
+
feature_ver = feature_ver[feature_ver["feature_version"] == process_id]
|
|
2674
|
+
|
|
2675
|
+
# Check if a feature with that process id exists or not. If not, raise error.
|
|
2676
|
+
if feature_ver.shape[0] == 0:
|
|
2677
|
+
res = _FSUtils._get_data_domains(self.__repo, process_id, 'feature_version')
|
|
2678
|
+
if res:
|
|
2679
|
+
msg_code = MessageCodes.EFS_OBJECT_IN_OTHER_DOMAIN
|
|
2680
|
+
error_msg = Messages.get_message(msg_code, "Feature", "process id '{}'".format(process_id),
|
|
2681
|
+
self.__data_domain, res)
|
|
2682
|
+
else:
|
|
2683
|
+
msg_code = MessageCodes.EFS_OBJECT_NOT_EXIST
|
|
2684
|
+
error_msg = Messages.get_message(msg_code, "Feature", "process id '{}'".format(process_id),
|
|
2685
|
+
self.__data_domain)
|
|
2686
|
+
raise TeradataMlException(error_msg, msg_code)
|
|
2687
|
+
|
|
2688
|
+
selected_features = {}
|
|
2689
|
+
for f_ver in feature_ver.itertuples():
|
|
2690
|
+
entity_id = f_ver.entity_id
|
|
2691
|
+
selected_features[f_ver.feature_name] = process_id
|
|
2692
|
+
return entity_id, selected_features
|
|
2693
|
+
|
|
2694
|
+
def __filter_dataset_by_as_of(self, dataset, entity_column, features_column_list, as_of):
|
|
2695
|
+
"""
|
|
2696
|
+
DESCRIPTION:
|
|
2697
|
+
Internal function to filter the dataset using as_of and
|
|
2698
|
+
return only required columns.
|
|
2699
|
+
|
|
2700
|
+
PARAMETERS:
|
|
2701
|
+
dataset:
|
|
2702
|
+
Required Argument.
|
|
2703
|
+
Specifies the teradataml DataFrame.
|
|
2704
|
+
Types: teradataml DataFrame
|
|
2705
|
+
|
|
2706
|
+
entity_column:
|
|
2707
|
+
Required Argument.
|
|
2708
|
+
Specifies the column name of entity.
|
|
2709
|
+
Types: str
|
|
2710
|
+
|
|
2711
|
+
features_column_list:
|
|
2712
|
+
Required Argument.
|
|
2713
|
+
Specifies the list of feature columns list.
|
|
2714
|
+
Types: list of str
|
|
2715
|
+
|
|
2716
|
+
as_of:
|
|
2717
|
+
Required Argument.
|
|
2718
|
+
Specifies the time to retrieve the Feature Values instead of
|
|
2719
|
+
retrieving the latest values.
|
|
2720
|
+
Notes:
|
|
2721
|
+
* Applicable only when "process_id" is passed to the function.
|
|
2722
|
+
* Ignored when "dataset_name" is passed.
|
|
2723
|
+
Types: str or datetime.datetime
|
|
2724
|
+
|
|
2725
|
+
RETURNS:
|
|
2726
|
+
teradataml DataFrame
|
|
2727
|
+
|
|
2728
|
+
RAISES:
|
|
2729
|
+
None
|
|
2730
|
+
|
|
2731
|
+
EXAMPLES:
|
|
2732
|
+
>>> load_examples_data("dataframe", "sales")
|
|
2733
|
+
>>> df = DataFrame("sales")
|
|
2734
|
+
>>> fs.__filter_dataset_by_as_of(df, "accounts", ["Jan", "Feb"], datetime.datetime(2025, 1, 1))
|
|
2735
|
+
|
|
2736
|
+
"""
|
|
2737
|
+
conditions = [
|
|
2738
|
+
(dataset[f"{f}_start_time"] <= as_of) & (as_of <= dataset[f"{f}_end_time"])
|
|
2739
|
+
for f in features_column_list
|
|
2740
|
+
]
|
|
2741
|
+
combined_condition = reduce(operator.and_, conditions)
|
|
2742
|
+
required_columns = UtilFuncs._as_list(entity_column) + features_column_list
|
|
2743
|
+
return dataset[combined_condition].select(required_columns)
|
|
2744
|
+
|
|
2745
|
+
def __get_feature_group_names(self, name, type_):
|
|
2746
|
+
"""
|
|
2747
|
+
DESCRIPTION:
|
|
2748
|
+
Internal function to get the associated group names for
|
|
2749
|
+
Feature or DataSource OR Entity.
|
|
2750
|
+
|
|
2751
|
+
PARAMETERS:
|
|
2752
|
+
name:
|
|
2753
|
+
Required Argument.
|
|
2754
|
+
Specifies the name of the Feature or DataSource or Entity.
|
|
2755
|
+
Types: str
|
|
2756
|
+
|
|
2757
|
+
type_:
|
|
2758
|
+
Required Argument.
|
|
2759
|
+
Specifies the type of the objects stored in feature store.
|
|
2760
|
+
Permitted Values:
|
|
2761
|
+
* feature
|
|
2762
|
+
* data_source
|
|
2763
|
+
* entity
|
|
2764
|
+
Types: str
|
|
2765
|
+
|
|
2766
|
+
RETURNS:
|
|
2767
|
+
list
|
|
2768
|
+
|
|
2769
|
+
RAISES:
|
|
2770
|
+
None
|
|
2771
|
+
|
|
2772
|
+
EXAMPLES:
|
|
2773
|
+
>>> self.__get_feature_group_names('admissions', 'data_source')
|
|
2774
|
+
"""
|
|
2775
|
+
if type_ == "feature":
|
|
2776
|
+
df = self.__get_features_df()
|
|
2777
|
+
return [rec.group_name for rec in df[df.name == name].itertuples() if rec.group_name is not None]
|
|
2778
|
+
elif type_ == "data_source":
|
|
2779
|
+
df = self.__get_feature_group_df()
|
|
2780
|
+
return [rec.name for rec in df[df.data_source_name == name].itertuples()]
|
|
2781
|
+
elif type_ == "entity":
|
|
2782
|
+
df = self.__get_feature_group_df()
|
|
2783
|
+
return [rec.name for rec in df[df.entity_name == name].itertuples()]
|
|
2784
|
+
|
|
2785
|
+
def __remove_obj(self, name, type_, action="archive"):
|
|
2786
|
+
"""
|
|
2787
|
+
DESCRIPTION:
|
|
2788
|
+
Internal function to get the remove Feature or DataSource OR
|
|
2789
|
+
Entity from repo.
|
|
2790
|
+
|
|
2791
|
+
PARAMETERS:
|
|
2792
|
+
name:
|
|
2793
|
+
Required Argument.
|
|
2794
|
+
Specifies the name of the Feature or DataSource or Entity.
|
|
2795
|
+
Types: str
|
|
2796
|
+
|
|
2797
|
+
type_:
|
|
2798
|
+
Required Argument.
|
|
2799
|
+
Specifies the type of "name".
|
|
2800
|
+
Types: str
|
|
2801
|
+
Permitted Values:
|
|
2802
|
+
* feature
|
|
2803
|
+
* data_source
|
|
2804
|
+
* entity
|
|
2805
|
+
|
|
2806
|
+
action:
|
|
2807
|
+
Optional Argument.
|
|
2808
|
+
Specifies whether to remove from staging tables or not.
|
|
2809
|
+
When set to True, object is removed from staging tables.
|
|
2810
|
+
Otherwise, object is fetched from regular tables.
|
|
2811
|
+
Default Value: True
|
|
2812
|
+
Types: bool
|
|
2813
|
+
|
|
2814
|
+
RETURNS:
|
|
2815
|
+
bool
|
|
2816
|
+
|
|
2817
|
+
RAISES:
|
|
2818
|
+
None
|
|
2819
|
+
|
|
2820
|
+
EXAMPLES:
|
|
2821
|
+
>>> self.__remove_obj('admissions', 'data_source')
|
|
2822
|
+
"""
|
|
2823
|
+
self._logger.debug(f"Removing object '{name}' of type '{type_}' with action '{action}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
2824
|
+
|
|
2825
|
+
_vars = {
|
|
2826
|
+
"data_source": {"class": DataSource, "error_msg": "Update these FeatureGroups with other DataSources"},
|
|
2827
|
+
"entity": {"class": Entity, "error_msg": "Update these FeatureGroups with other Entities"},
|
|
2828
|
+
"feature": {"class": Feature, "error_msg": "Remove the Feature from FeatureGroup"},
|
|
2829
|
+
}
|
|
2830
|
+
c_name_ = _vars[type_]["class"].__name__
|
|
2831
|
+
argument_validation_params = []
|
|
2832
|
+
argument_validation_params.append([type_, name, False, (str, _vars[type_]["class"]), True])
|
|
2833
|
+
|
|
2834
|
+
# Validate argument types
|
|
2835
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
2836
|
+
# Extract the name if argument is class type.
|
|
2837
|
+
if isinstance(name, _vars[type_]["class"]):
|
|
2838
|
+
self._logger.debug(f"Extracted name '{name.name}' from {type_} object")
|
|
2839
|
+
name = name.name
|
|
2840
|
+
|
|
2841
|
+
# Get the feature info DataFrame.
|
|
2842
|
+
feature_info_df = self.__get_feature_info_df()
|
|
2843
|
+
|
|
2844
|
+
# Before removing it, check if it is associated with any FeatureGroup.
|
|
2845
|
+
# If yes, raise error. Applicable only for Archive.
|
|
2846
|
+
if action == "archive":
|
|
2847
|
+
self._logger.debug(f"Checking if {type_} '{name}' is associated with feature groups before archiving")
|
|
2848
|
+
feature_groups = self.__get_feature_group_names(name, type_)
|
|
2849
|
+
if feature_groups:
|
|
2850
|
+
feature_groups_str = ", ".join(("'{}'".format(fg) for fg in feature_groups))
|
|
2851
|
+
self._logger.debug(f"{c_name_} '{name}' is associated with FeatureGroups: {feature_groups_str}")
|
|
2852
|
+
message = ("{} '{}' is associated with FeatureGroups {}. {} and try deleting again.".format(
|
|
2853
|
+
c_name_, name, feature_groups_str, _vars[type_]["error_msg"]))
|
|
2854
|
+
raise TeradataMlException(Messages.get_message(
|
|
2855
|
+
MessageCodes.FUNC_EXECUTION_FAILED, '{}_{}'.format(action, type_), message),
|
|
2856
|
+
MessageCodes.FUNC_EXECUTION_FAILED)
|
|
2857
|
+
# Check if the feature or entity exists in Feature metadata table.
|
|
2858
|
+
# If yes, then raise error. Applicable only for Archive.
|
|
2859
|
+
info_checks = {
|
|
2860
|
+
'feature': ('name', MessageCodes.EFS_FEATURE_IN_CATALOG),
|
|
2861
|
+
'entity': ('entity_name', MessageCodes.EFS_ENTITY_IN_CATALOG)
|
|
2862
|
+
}
|
|
2863
|
+
if type_ in info_checks:
|
|
2864
|
+
col, error_code = info_checks[type_]
|
|
2865
|
+
validate_df = feature_info_df[feature_info_df[col].isin([name])]
|
|
2866
|
+
if validate_df.shape[0] > 0:
|
|
2867
|
+
self._logger.debug(f"{c_name_} '{name}' exists in feature catalog and cannot be archived")
|
|
2868
|
+
if type_ == "entity":
|
|
2869
|
+
related_features = [feature.name for feature in validate_df.itertuples()]
|
|
2870
|
+
features = ", ".join(("'{}'".format(f) for f in related_features))
|
|
2871
|
+
err_msg = Messages.get_message(error_code,
|
|
2872
|
+
name,
|
|
2873
|
+
features)
|
|
2874
|
+
else:
|
|
2875
|
+
err_msg = Messages.get_message(error_code,
|
|
2876
|
+
name)
|
|
2877
|
+
raise TeradataMlException(err_msg, error_code)
|
|
2878
|
+
|
|
2879
|
+
stg_table = _FeatureStoreDFContainer.get_df("{}_staging".format(type_), self.__repo, self.__data_domain)
|
|
2880
|
+
stg_table = stg_table[stg_table.name == name]
|
|
2881
|
+
if stg_table.shape[0] > 0:
|
|
2882
|
+
self._logger.info(f"{c_name_} '{name}' is already archived")
|
|
2883
|
+
print("{} '{}' is already archived.".format(c_name_, name))
|
|
2884
|
+
return False
|
|
2885
|
+
|
|
2886
|
+
# Validation for delete action - ensure object is already archived
|
|
2887
|
+
if action == "delete":
|
|
2888
|
+
self._logger.debug(f"Validating {type_} '{name}' is archived before deletion")
|
|
2889
|
+
# Check if object exists in main table (not archived)
|
|
2890
|
+
main_table_name = self.__table_names[type_]
|
|
2891
|
+
main_df = _FeatureStoreDFContainer.get_df(type_, self.__repo, self.__data_domain)
|
|
2892
|
+
existing_records = main_df[(main_df["name"] == name)]
|
|
2893
|
+
|
|
2894
|
+
if existing_records.shape[0] > 0:
|
|
2895
|
+
self._logger.debug(f"{c_name_} '{name}' must be archived before deletion")
|
|
2896
|
+
error_code = MessageCodes.EFS_DELETE_BEFORE_ARCHIVE
|
|
2897
|
+
error_msg = Messages.get_message(error_code,
|
|
2898
|
+
c_name_,
|
|
2899
|
+
name,
|
|
2900
|
+
type_)
|
|
2901
|
+
raise TeradataMlException(error_msg, error_code)
|
|
2902
|
+
|
|
2903
|
+
if type_ == "entity":
|
|
2904
|
+
self._logger.debug(f"Removing entity '{name}' using specialized entity removal method")
|
|
2905
|
+
res = self._remove_entity(name, action)
|
|
2906
|
+
else:
|
|
2907
|
+
table_name = self.__table_names[type_]
|
|
2908
|
+
if action == "delete":
|
|
2909
|
+
table_name = self.__table_names["{}_staging".format(type_)]
|
|
2910
|
+
|
|
2911
|
+
self._logger.debug(f"Removing {type_} '{name}' from table '{table_name}'")
|
|
2912
|
+
res = _delete_data(table_name=table_name,
|
|
2913
|
+
schema_name=self.__repo,
|
|
2914
|
+
delete_conditions=(Col("name") == name) &
|
|
2915
|
+
(Col("data_domain") == self.__data_domain)
|
|
2916
|
+
)
|
|
2917
|
+
|
|
2918
|
+
if res == 1:
|
|
2919
|
+
self._logger.info(f"{c_name_} '{name}' successfully {action}d")
|
|
2920
|
+
print("{} '{}' is {}d.".format(c_name_, name, action))
|
|
2921
|
+
return True
|
|
2922
|
+
else:
|
|
2923
|
+
self._logger.debug(f"{c_name_} '{name}' does not exist to {action}")
|
|
2924
|
+
print("{} '{}' does not exist to {}.".format(c_name_, name, action))
|
|
2925
|
+
return False
|
|
2926
|
+
|
|
2927
|
+
@db_transaction
|
|
2928
|
+
def _remove_entity(self, name, action):
|
|
2929
|
+
"""
|
|
2930
|
+
DESCRIPTION:
|
|
2931
|
+
Internal function to get the remove Entity from repo.
|
|
2932
|
+
|
|
2933
|
+
PARAMETERS:
|
|
2934
|
+
name:
|
|
2935
|
+
Required Argument.
|
|
2936
|
+
Specifies the name of the Entity.
|
|
2937
|
+
Types: str
|
|
2938
|
+
|
|
2939
|
+
action:
|
|
2940
|
+
Required Argument.
|
|
2941
|
+
Specifies whether to remove from staging tables or not.
|
|
2942
|
+
When set to "delete", Entity is removed from staging tables.
|
|
2943
|
+
Otherwise, Entity is removed from regular tables.
|
|
2944
|
+
Types: str
|
|
2945
|
+
|
|
2946
|
+
RETURNS:
|
|
2947
|
+
bool
|
|
2948
|
+
|
|
2949
|
+
RAISES:
|
|
2950
|
+
None
|
|
2951
|
+
|
|
2952
|
+
EXAMPLES:
|
|
2953
|
+
>>> self._remove_entity('admissions', 'delete')
|
|
2954
|
+
"""
|
|
2955
|
+
self._logger.debug(f"Removing entity '{name}' from repository '{self.__repo}', data_domain '{self.__data_domain}', action: {action}")
|
|
2956
|
+
|
|
2957
|
+
ent_table = self.__table_names["entity"]
|
|
2958
|
+
ent_table_xref = self.__table_names["entity_xref"]
|
|
2959
|
+
if action == "delete":
|
|
2960
|
+
ent_table = self.__table_names["entity_staging"]
|
|
2961
|
+
ent_table_xref = self.__table_names["entity_staging_xref"]
|
|
2962
|
+
|
|
2963
|
+
# remove it from xref table first.
|
|
2964
|
+
self._logger.debug(f"Removing entity '{name}' from {ent_table_xref} table")
|
|
2965
|
+
_delete_data(table_name=ent_table_xref,
|
|
2966
|
+
schema_name=self.__repo,
|
|
2967
|
+
delete_conditions=(Col("entity_name") == name) &
|
|
2968
|
+
(Col("data_domain") == self.__data_domain)
|
|
2969
|
+
)
|
|
2970
|
+
|
|
2971
|
+
# remove from entity table.
|
|
2972
|
+
self._logger.debug(f"Removing entity '{name}' from {ent_table} table")
|
|
2973
|
+
res = _delete_data(table_name=ent_table,
|
|
2974
|
+
schema_name=self.__repo,
|
|
2975
|
+
delete_conditions=(Col("name") == name) &
|
|
2976
|
+
(Col("data_domain") == self.__data_domain)
|
|
2977
|
+
)
|
|
2978
|
+
|
|
2979
|
+
return res
|
|
2980
|
+
|
|
2981
|
+
def archive_data_source(self, data_source):
|
|
2982
|
+
"""
|
|
2983
|
+
DESCRIPTION:
|
|
2984
|
+
Archives DataSource from repository. Note that archived DataSource
|
|
2985
|
+
is not available for any further processing. Archived DataSource can be
|
|
2986
|
+
viewed using "list_data_sources(archived=True)" method.
|
|
2987
|
+
|
|
2988
|
+
PARAMETERS:
|
|
2989
|
+
data_source:
|
|
2990
|
+
Required Argument.
|
|
2991
|
+
Specifies either the name of DataSource or Object of DataSource
|
|
2992
|
+
to archive from repository.
|
|
2993
|
+
Types: str OR DataSource
|
|
2994
|
+
|
|
2995
|
+
RETURNS:
|
|
2996
|
+
bool
|
|
2997
|
+
|
|
2998
|
+
RAISES:
|
|
2999
|
+
TeradataMLException, TypeError, ValueError
|
|
3000
|
+
|
|
3001
|
+
EXAMPLES:
|
|
3002
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore
|
|
3003
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3004
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
3005
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3006
|
+
# Setup FeatureStore for this repository.
|
|
3007
|
+
>>> fs.setup()
|
|
3008
|
+
True
|
|
3009
|
+
|
|
3010
|
+
# Example 1: Archive the DataSource 'sales_data' in the repo 'vfs_v1' using DataSource object.
|
|
3011
|
+
# Create a DataSource using SELECT statement.
|
|
3012
|
+
>>> ds = DataSource(name="sales_data", source="select * from sales")
|
|
3013
|
+
# Apply DataSource to FeatureStore.
|
|
3014
|
+
>>> fs.apply(ds)
|
|
3015
|
+
True
|
|
3016
|
+
|
|
3017
|
+
# List the available DataSources.
|
|
3018
|
+
>>> fs.list_data_sources()
|
|
3019
|
+
description timestamp_column source creation_time modified_time
|
|
3020
|
+
name data_domain
|
|
3021
|
+
sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None
|
|
3022
|
+
|
|
3023
|
+
# Archive DataSource with name "sales_data".
|
|
3024
|
+
>>> fs.archive_data_source("sales_data")
|
|
3025
|
+
DataSource 'sales_data' is archived.
|
|
3026
|
+
True
|
|
3027
|
+
|
|
3028
|
+
# List the available DataSources after archive.
|
|
3029
|
+
>>> fs.list_data_sources(archived=True)
|
|
3030
|
+
name data_domain description timestamp_column source creation_time modified_time archived_time
|
|
3031
|
+
0 sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None 2025-07-28 04:25:55.430000
|
|
3032
|
+
|
|
3033
|
+
# Example 2: Archive the DataSource 'sales_data' in the repo 'vfs_v1' using DataSource name.
|
|
3034
|
+
# Create a DataSource using teradataml DataFrame.
|
|
3035
|
+
>>> from teradataml import DataFrame
|
|
3036
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
3037
|
+
>>> df = DataFrame("sales")
|
|
3038
|
+
>>> ds2 = DataSource(name="sales_data_df", source=df)
|
|
3039
|
+
|
|
3040
|
+
# Apply DataSource to FeatureStore.
|
|
3041
|
+
>>> fs.apply(ds2)
|
|
3042
|
+
True
|
|
3043
|
+
|
|
3044
|
+
# Archive DataSource with name "sales_data_df".
|
|
3045
|
+
>>> fs.archive_data_source("sales_data_df")
|
|
3046
|
+
DataSource 'sales_data_df' is archived.
|
|
3047
|
+
True
|
|
3048
|
+
|
|
3049
|
+
# List the available DataSources after archive.
|
|
3050
|
+
>>> fs.list_data_sources(archived=True)
|
|
3051
|
+
name data_domain description timestamp_column source creation_time modified_time archived_time
|
|
3052
|
+
0 sales_data ALICE None None select * from sales 2025-07-28 04:24:48.117827 None 2025-07-28 04:25:55.430000
|
|
3053
|
+
1 sales_data_df ALICE None None select * from sales 2025-07-28 04:26:10.123456 None 2025-07-28 04:26:45.456789
|
|
3054
|
+
|
|
3055
|
+
|
|
3056
|
+
"""
|
|
3057
|
+
self._logger.info(f"Archiving data source '{data_source}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
3058
|
+
return self.__remove_obj(name=data_source, type_="data_source")
|
|
3059
|
+
|
|
3060
|
+
def delete_data_source(self, data_source):
|
|
3061
|
+
"""
|
|
3062
|
+
DESCRIPTION:
|
|
3063
|
+
Removes the archived DataSource from repository.
|
|
3064
|
+
|
|
3065
|
+
PARAMETERS:
|
|
3066
|
+
data_source:
|
|
3067
|
+
Required Argument.
|
|
3068
|
+
Specifies either the name of DataSource or Object of DataSource
|
|
3069
|
+
to remove from repository.
|
|
3070
|
+
Types: str OR DataSource
|
|
3071
|
+
|
|
3072
|
+
RETURNS:
|
|
3073
|
+
bool.
|
|
3074
|
+
|
|
3075
|
+
RAISES:
|
|
3076
|
+
TeradataMLException, TypeError, ValueError
|
|
3077
|
+
|
|
3078
|
+
EXAMPLES:
|
|
3079
|
+
>>> from teradataml import DataFrame, DataSource, FeatureStore, load_example_data
|
|
3080
|
+
# Create teradataml DataFrame.
|
|
3081
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
3082
|
+
>>> df = DataFrame("sales")
|
|
3083
|
+
|
|
3084
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3085
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
3086
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3087
|
+
# Setup FeatureStore for this repository.
|
|
3088
|
+
>>> fs.setup()
|
|
3089
|
+
True
|
|
3090
|
+
|
|
3091
|
+
# Create DataSource with source as teradataml DataFrame.
|
|
3092
|
+
>>> ds = DataSource(name="sales_data", source=df)
|
|
3093
|
+
# Apply the DataSource to FeatureStore.
|
|
3094
|
+
>>> fs.apply(ds)
|
|
3095
|
+
True
|
|
3096
|
+
|
|
3097
|
+
# Let's first archive the DataSource.
|
|
3098
|
+
>>> fs.archive_data_source("sales_data")
|
|
3099
|
+
DataSource 'sales_data' is archived.
|
|
3100
|
+
True
|
|
3101
|
+
|
|
3102
|
+
# Delete DataSource with name "sales_data".
|
|
3103
|
+
>>> fs.delete_data_source("sales_data")
|
|
3104
|
+
DataSource 'sales_data' is deleted.
|
|
3105
|
+
True
|
|
3106
|
+
|
|
3107
|
+
# List the available DataSources after delete.
|
|
3108
|
+
>>> fs.list_data_sources()
|
|
3109
|
+
Empty DataFrame
|
|
3110
|
+
Columns: [description, timestamp_column, source, creation_time, modified_time]
|
|
3111
|
+
Index: []
|
|
3112
|
+
"""
|
|
3113
|
+
self._logger.info(f"Deleting data source '{data_source}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
3114
|
+
return self.__remove_obj(name=data_source, type_="data_source", action="delete")
|
|
3115
|
+
|
|
3116
|
+
def archive_feature(self, feature):
|
|
3117
|
+
"""
|
|
3118
|
+
DESCRIPTION:
|
|
3119
|
+
Archives Feature from repository. Note that archived Feature
|
|
3120
|
+
is not available for any further processing. Archived Feature can be
|
|
3121
|
+
viewed using "list_features(archived=True)" method.
|
|
3122
|
+
|
|
3123
|
+
PARAMETERS:
|
|
3124
|
+
feature:
|
|
3125
|
+
Required Argument.
|
|
3126
|
+
Specifies either the name of Feature or Object of Feature
|
|
3127
|
+
to archive from repository.
|
|
3128
|
+
Types: str OR Feature
|
|
3129
|
+
|
|
3130
|
+
RETURNS:
|
|
3131
|
+
bool
|
|
3132
|
+
|
|
3133
|
+
RAISES:
|
|
3134
|
+
TeradataMLException, TypeError, ValueError
|
|
3135
|
+
|
|
3136
|
+
EXAMPLES:
|
|
3137
|
+
>>> from teradataml import DataFrame, Feature, FeatureStore
|
|
3138
|
+
# Create teradataml DataFrame.
|
|
3139
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
3140
|
+
>>> df = DataFrame("sales")
|
|
3141
|
+
|
|
3142
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3143
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
3144
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3145
|
+
# Setup FeatureStore for this repository.
|
|
3146
|
+
>>> fs.setup()
|
|
3147
|
+
True
|
|
3148
|
+
|
|
3149
|
+
Example 1: Archive the Feature 'sales_data_Feb' in the repo 'vfs_v1' using Feature object.
|
|
3150
|
+
# Create Feature for Column 'Feb'.
|
|
3151
|
+
>>> feature = Feature(name="sales_data_Feb", column=df.Feb)
|
|
3152
|
+
# Apply the Feature to FeatureStore.
|
|
3153
|
+
>>> fs.apply(feature)
|
|
3154
|
+
True
|
|
3155
|
+
|
|
3156
|
+
# List the available Features.
|
|
3157
|
+
>>> fs.list_features()
|
|
3158
|
+
id column_name description tags data_type feature_type status creation_time modified_time group_name
|
|
3159
|
+
name data_domain
|
|
3160
|
+
sales_data_Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None None
|
|
3161
|
+
|
|
3162
|
+
# Archive Feature with name "sales_data_Feb".
|
|
3163
|
+
>>> fs.archive_feature(feature=feature)
|
|
3164
|
+
Feature 'sales_data_Feb' is archived.
|
|
3165
|
+
True
|
|
3166
|
+
|
|
3167
|
+
# List the available archived Features.
|
|
3168
|
+
>>> fs.list_features(archived=True)
|
|
3169
|
+
id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
|
|
3170
|
+
0 1 sales_data_Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None 2025-07-28 04:41:35.600000 None
|
|
3171
|
+
|
|
3172
|
+
# Example 2: Archive the Feature 'sales_data_Feb' in the repo 'vfs_v1' using feature name.
|
|
3173
|
+
# Create Feature for Column 'Jan'.
|
|
3174
|
+
>>> feature2 = Feature(name="sales_data_Jan", column=df.Jan)
|
|
3175
|
+
# Apply the Feature to FeatureStore.
|
|
3176
|
+
>>> fs.apply(feature2)
|
|
3177
|
+
True
|
|
3178
|
+
|
|
3179
|
+
# Archive Feature with name "sales_data_Jan".
|
|
3180
|
+
>>> fs.archive_feature(feature="sales_data_Jan")
|
|
3181
|
+
Feature 'sales_data_Jan' is archived.
|
|
3182
|
+
True
|
|
3183
|
+
|
|
3184
|
+
# List the available archived Features.
|
|
3185
|
+
>>> fs.list_features(archived=True)
|
|
3186
|
+
id name data_domain column_name description tags data_type feature_type status creation_time modified_time archived_time group_name
|
|
3187
|
+
0 1 sales_data_Feb ALICE Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:41:01.641026 None 2025-07-28 04:41:35.600000 None
|
|
3188
|
+
1 2 sales_data_Jan ALICE Jan None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:42:01.641026 None 2025-07-28 04:43:35.600000 None
|
|
3189
|
+
|
|
3190
|
+
"""
|
|
3191
|
+
self._logger.info(f"Archiving feature from repository '{self.__repo}', data_domain '{self.__data_domain}', feature: {feature}")
|
|
3192
|
+
result = self.__remove_obj(name=feature, type_="feature")
|
|
3193
|
+
self._logger.debug(f"Feature archive operation completed with result: {result}")
|
|
3194
|
+
return result
|
|
3195
|
+
|
|
3196
|
+
def delete(self, force=False):
|
|
3197
|
+
"""
|
|
3198
|
+
DESCRIPTION:
|
|
3199
|
+
Removes the FeatureStore and its components from repository.
|
|
3200
|
+
Notes:
|
|
3201
|
+
* The function removes all the associated database objects along with data.
|
|
3202
|
+
Be cautious while using this function.
|
|
3203
|
+
* The function tries to remove the underlying Database also once
|
|
3204
|
+
all the Feature Store objects are removed.
|
|
3205
|
+
* The user must have permission on the database used by this Feature Store
|
|
3206
|
+
* to drop triggers.
|
|
3207
|
+
* to drop the tables.
|
|
3208
|
+
* to drop the Database.
|
|
3209
|
+
* If the user lacks any of the mentioned permissions, Teradata recommends
|
|
3210
|
+
to not use this function.
|
|
3211
|
+
|
|
3212
|
+
PARAMETERS:
|
|
3213
|
+
force:
|
|
3214
|
+
Optional Argument.
|
|
3215
|
+
Specifies whether to forcefully delete feature store or not.
|
|
3216
|
+
When set to True, delete() method proceeds to drop objects
|
|
3217
|
+
even if previous step is errored. Otherwise, delete() method
|
|
3218
|
+
raises the exception at the first error and do not proceed to
|
|
3219
|
+
remove other objects.
|
|
3220
|
+
Defaults: False
|
|
3221
|
+
Types: bool
|
|
3222
|
+
|
|
3223
|
+
RETURNS:
|
|
3224
|
+
bool.
|
|
3225
|
+
|
|
3226
|
+
RAISES:
|
|
3227
|
+
None
|
|
3228
|
+
|
|
3229
|
+
EXAMPLES:
|
|
3230
|
+
# Setup FeatureStore for repo 'vfs_v1'.
|
|
3231
|
+
>>> from teradataml import FeatureStore
|
|
3232
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
3233
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3234
|
+
|
|
3235
|
+
# Setup FeatureStore.
|
|
3236
|
+
>>> fs.setup()
|
|
3237
|
+
True
|
|
3238
|
+
|
|
3239
|
+
# Delete the FeatureStore and all its components.
|
|
3240
|
+
>>> fs.delete()
|
|
3241
|
+
The function removes Feature Store and drops the corresponding repo also. Are you sure you want to proceed? (Y/N): Y
|
|
3242
|
+
True
|
|
3243
|
+
|
|
3244
|
+
# Forcefully delete the FeatureStore and all its components.
|
|
3245
|
+
>>> fs.delete(force=True)
|
|
3246
|
+
The function removes Feature Store and drops the corresponding repo also. Are you sure you want to proceed? (Y/N): Y
|
|
3247
|
+
True
|
|
3248
|
+
"""
|
|
3249
|
+
self._logger.info(f"Deleting FeatureStore and all components from repository '{self.__repo}', force: {force}")
|
|
3250
|
+
|
|
3251
|
+
_args = []
|
|
3252
|
+
_args.append(["force", force, True, (bool)])
|
|
3253
|
+
# Validate argument types
|
|
3254
|
+
_Validators._validate_function_arguments(_args)
|
|
3255
|
+
|
|
3256
|
+
confirmation = input("The function removes Feature Store and drops the "
|
|
3257
|
+
"corresponding repo also. Are you sure you want to proceed? (Y/N): ")
|
|
3258
|
+
|
|
3259
|
+
if confirmation in ["Y", "y"]:
|
|
3260
|
+
return self.__drop_feature_store_objects(force=force)
|
|
3261
|
+
|
|
3262
|
+
self._logger.info(f"FeatureStore deletion cancelled by user")
|
|
3263
|
+
return False
|
|
3264
|
+
|
|
3265
|
+
def __drop_feature_store_objects(self, force=False):
|
|
3266
|
+
"""
|
|
3267
|
+
DESCRIPTION:
|
|
3268
|
+
Removes the FeatureStore and it's components from repository.
|
|
3269
|
+
|
|
3270
|
+
PARAMETERS:
|
|
3271
|
+
repo_name:
|
|
3272
|
+
Required Argument.
|
|
3273
|
+
Specifies the name of the repository.
|
|
3274
|
+
Types: str
|
|
3275
|
+
|
|
3276
|
+
force:
|
|
3277
|
+
Optional Argument.
|
|
3278
|
+
Specifies whether to forcefully delete feature store or not.
|
|
3279
|
+
When set to True, delete() method proceeds to drop objects
|
|
3280
|
+
even if previous step is errored. Otherwise, delete() method
|
|
3281
|
+
raises the exception at the first error and do not proceed to
|
|
3282
|
+
remove other objects.
|
|
3283
|
+
Defaults: False.
|
|
3284
|
+
Types: bool
|
|
3285
|
+
|
|
3286
|
+
RETURNS:
|
|
3287
|
+
bool
|
|
3288
|
+
"""
|
|
3289
|
+
self._logger.debug(f"Starting to drop FeatureStore objects from repository '{self.__repo}', force: {force}")
|
|
3290
|
+
|
|
3291
|
+
# Drop all the tables and staging tables.
|
|
3292
|
+
self._logger.debug(f"Preparing to drop {len(self.__table_names)} table types from repository")
|
|
3293
|
+
tables_ = [
|
|
3294
|
+
self.__table_names["group_features"],
|
|
3295
|
+
self.__table_names["feature_group"],
|
|
3296
|
+
self.__table_names['feature'],
|
|
3297
|
+
self.__table_names['entity_xref'],
|
|
3298
|
+
self.__table_names['entity'],
|
|
3299
|
+
self.__table_names['data_source'],
|
|
3300
|
+
self.__table_names['feature_process'],
|
|
3301
|
+
self.__table_names['feature_runs'],
|
|
3302
|
+
self.__table_names['feature_metadata'],
|
|
3303
|
+
self.__table_names['dataset_catalog'],
|
|
3304
|
+
self.__table_names['dataset_features'],
|
|
3305
|
+
self.__table_names['data_domain'],
|
|
3306
|
+
self.__table_names['version']
|
|
3307
|
+
]
|
|
3308
|
+
|
|
3309
|
+
tables_stg_ = [
|
|
3310
|
+
self.__table_names['feature_staging'],
|
|
3311
|
+
self.__table_names["entity_staging"],
|
|
3312
|
+
self.__table_names["entity_staging_xref"],
|
|
3313
|
+
self.__table_names["data_source_staging"],
|
|
3314
|
+
self.__table_names["feature_group_staging"],
|
|
3315
|
+
self.__table_names["group_features_staging"]
|
|
3316
|
+
]
|
|
3317
|
+
|
|
3318
|
+
# Drop all the triggers first. So that tables can be dropped.
|
|
3319
|
+
self._logger.debug(f"Dropping {len(EFS_TRIGGERS)} triggers from repository '{self.__repo}'")
|
|
3320
|
+
ignr_errors = 'all' if force else None
|
|
3321
|
+
for trigger in EFS_TRIGGERS.values():
|
|
3322
|
+
self._logger.debug(f"Dropping trigger: {self.__repo}.{trigger}")
|
|
3323
|
+
execute_sql("drop trigger {}.{}".format(self.__repo, trigger),
|
|
3324
|
+
ignore_errors=ignr_errors)
|
|
3325
|
+
|
|
3326
|
+
# Drop the views first.
|
|
3327
|
+
self._logger.debug(f"Dropping views from repository '{self.__repo}'")
|
|
3328
|
+
views_ = [EFS_DB_COMPONENTS['feature_version']]
|
|
3329
|
+
for view in views_:
|
|
3330
|
+
self._logger.debug(f"Dropping view: {self.__repo}.{view}")
|
|
3331
|
+
db_drop_view(view, schema_name=self.__repo, suppress_error=force)
|
|
3332
|
+
|
|
3333
|
+
# Drop datesets.
|
|
3334
|
+
self._logger.debug(f"Dropping datasets from repository '{self.__repo}'")
|
|
3335
|
+
# Used EFS_DB_COMPONENTS['dataset_catalog'] because it contains all the datasets.
|
|
3336
|
+
# The get_df methods are filtered by data_domain, hence they don't show all datasets.
|
|
3337
|
+
for dataset in DataFrame(in_schema(self.__repo, EFS_DB_COMPONENTS['dataset_catalog'])).itertuples():
|
|
3338
|
+
self._logger.debug(f"Dropping dataset view: {dataset.name}")
|
|
3339
|
+
db_drop_view(dataset.name, schema_name=self.__repo, suppress_error=force)
|
|
3340
|
+
|
|
3341
|
+
# Drop all the Feature tables.
|
|
3342
|
+
self._logger.debug(f"Dropping feature tables from repository '{self.__repo}'")
|
|
3343
|
+
dropped_tab = set()
|
|
3344
|
+
# Used EFS_DB_COMPONENTS['feature_metadata'] because it contains all the features.
|
|
3345
|
+
# The get_df methods are filtered by data_domain, hence they don't show all features.
|
|
3346
|
+
for rec in DataFrame(in_schema(self.__repo, EFS_DB_COMPONENTS['feature_metadata'])).itertuples():
|
|
3347
|
+
# Avoid dropping the same table again.
|
|
3348
|
+
dropped_tab.add(rec.table_name)
|
|
3349
|
+
|
|
3350
|
+
for table in dropped_tab:
|
|
3351
|
+
self._logger.debug(f"Dropping feature table: {table}")
|
|
3352
|
+
db_drop_table(table, schema_name=self.__repo, suppress_error=force)
|
|
3353
|
+
|
|
3354
|
+
self._logger.debug(f"Dropping {len(tables_ + tables_stg_)} main and staging tables from repository '{self.__repo}'")
|
|
3355
|
+
for table in (tables_ + tables_stg_):
|
|
3356
|
+
self._logger.debug(f"Dropping table: {table}")
|
|
3357
|
+
db_drop_table(table, schema_name=self.__repo, suppress_error=force)
|
|
3358
|
+
|
|
3359
|
+
self._logger.debug(f"Dropping repository database: {self.__repo}")
|
|
3360
|
+
execute_sql(f"DROP DATABASE {self.__repo}")
|
|
3361
|
+
|
|
3362
|
+
self._logger.debug(f"Successfully completed dropping all FeatureStore objects from repository '{self.__repo}'")
|
|
3363
|
+
return True
|
|
3364
|
+
|
|
3365
|
+
def delete_feature(self, feature):
|
|
3366
|
+
"""
|
|
3367
|
+
DESCRIPTION:
|
|
3368
|
+
Removes the archived Feature from repository.
|
|
3369
|
+
|
|
3370
|
+
PARAMETERS:
|
|
3371
|
+
feature:
|
|
3372
|
+
Required Argument.
|
|
3373
|
+
Specifies either the name of Feature or Object of Feature
|
|
3374
|
+
to remove from repository.
|
|
3375
|
+
Types: str OR Feature
|
|
3376
|
+
|
|
3377
|
+
RETURNS:
|
|
3378
|
+
bool.
|
|
3379
|
+
|
|
3380
|
+
RAISES:
|
|
3381
|
+
TeradataMLException, TypeError, ValueError
|
|
3382
|
+
|
|
3383
|
+
EXAMPLES:
|
|
3384
|
+
>>> from teradataml import DataFrame, Feature, FeatureStore
|
|
3385
|
+
# Create teradataml DataFrame.
|
|
3386
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
3387
|
+
>>> df = DataFrame("sales")
|
|
3388
|
+
|
|
3389
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3390
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
3391
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3392
|
+
# Setup FeatureStore for this repository.
|
|
3393
|
+
>>> fs.setup()
|
|
3394
|
+
True
|
|
3395
|
+
|
|
3396
|
+
# Example 1: Delete the Feature 'sales_data_Feb' in the repo 'vfs_v1' using Feature object.
|
|
3397
|
+
# Create Feature for Column 'Feb'.
|
|
3398
|
+
>>> feature = Feature(name="sales_data_Feb", column=df.Feb)
|
|
3399
|
+
# Add the feature created above in the feature store.
|
|
3400
|
+
>>> fs.apply(feature)
|
|
3401
|
+
True
|
|
3402
|
+
|
|
3403
|
+
# List the available Features.
|
|
3404
|
+
>>> fs.list_features()
|
|
3405
|
+
id column_name description tags data_type feature_type status creation_time modified_time group_name
|
|
3406
|
+
name data_domain
|
|
3407
|
+
sales_data_Feb ALICE 1 Feb None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:49:55.827391 None None
|
|
3408
|
+
|
|
3409
|
+
# Let's first archive the Feature.
|
|
3410
|
+
>>> fs.archive_feature(feature=feature)
|
|
3411
|
+
Feature 'sales_data_Feb' is archived.
|
|
3412
|
+
True
|
|
3413
|
+
|
|
3414
|
+
# Delete Feature with name "sales_data_Feb".
|
|
3415
|
+
>>> fs.delete_feature(feature=feature)
|
|
3416
|
+
Feature 'sales_data_Feb' is deleted.
|
|
3417
|
+
True
|
|
3418
|
+
|
|
3419
|
+
# List the available Features after delete.
|
|
3420
|
+
>>> fs.list_features()
|
|
3421
|
+
Empty DataFrame
|
|
3422
|
+
Columns: [id, column_name, description, tags, data_type, feature_type, status, creation_time, modified_time, group_name]
|
|
3423
|
+
Index: []
|
|
3424
|
+
|
|
3425
|
+
Example 2: Delete the Feature 'sales_data_Feb' in the repo 'vfs_v1' using feature name.
|
|
3426
|
+
# Create Feature for Column 'Jan'.
|
|
3427
|
+
>>> feature2 = Feature(name="sales_data_Jan", column=df.Jan)
|
|
3428
|
+
# Add the feature created above in the feature store.
|
|
3429
|
+
>>> fs.apply(feature2)
|
|
3430
|
+
True
|
|
3431
|
+
|
|
3432
|
+
# List the available Features.
|
|
3433
|
+
>>> fs.list_features()
|
|
3434
|
+
id column_name description tags data_type feature_type status creation_time modified_time group_name
|
|
3435
|
+
name data_domain
|
|
3436
|
+
sales_data_Jan ALICE 2 Jan None None FLOAT CONTINUOUS ACTIVE 2025-07-28 04:50:55.827391 None None
|
|
3437
|
+
|
|
3438
|
+
# Let's first archive the Feature using feature name.
|
|
3439
|
+
>>> fs.archive_feature(feature="sales_data_Jan")
|
|
3440
|
+
Feature 'sales_data_Jan' is archived.
|
|
3441
|
+
True
|
|
3442
|
+
|
|
3443
|
+
# Delete Feature with name "sales_data_Jan".
|
|
3444
|
+
>>> fs.delete_feature(feature="sales_data_Jan")
|
|
3445
|
+
Feature 'sales_data_Jan' is deleted.
|
|
3446
|
+
True
|
|
3447
|
+
"""
|
|
3448
|
+
self._logger.info(f"Deleting feature '{feature}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
3449
|
+
return self.__remove_obj(name=feature, type_="feature", action="delete")
|
|
3450
|
+
|
|
3451
|
+
def archive_entity(self, entity):
|
|
3452
|
+
"""
|
|
3453
|
+
DESCRIPTION:
|
|
3454
|
+
Archives Entity from repository. Note that archived Entity
|
|
3455
|
+
is not available for any further processing. Archived Entity can be
|
|
3456
|
+
viewed using "list_entities(archived=True)" method.
|
|
3457
|
+
|
|
3458
|
+
PARAMETERS:
|
|
3459
|
+
entity:
|
|
3460
|
+
Required Argument.
|
|
3461
|
+
Specifies either the name of Entity or Object of Entity
|
|
3462
|
+
to remove from repository.
|
|
3463
|
+
Types: str OR Entity
|
|
3464
|
+
|
|
3465
|
+
RETURNS:
|
|
3466
|
+
bool.
|
|
3467
|
+
|
|
3468
|
+
RAISES:
|
|
3469
|
+
TeradataMLException, TypeError, ValueError
|
|
3470
|
+
|
|
3471
|
+
EXAMPLES:
|
|
3472
|
+
>>> from teradataml import DataFrame, Entity, FeatureStore
|
|
3473
|
+
# Create teradataml DataFrame.
|
|
3474
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
3475
|
+
>>> df = DataFrame("sales")
|
|
3476
|
+
|
|
3477
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3478
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
3479
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3480
|
+
# Setup FeatureStore for this repository.
|
|
3481
|
+
>>> fs.setup()
|
|
3482
|
+
True
|
|
3483
|
+
|
|
3484
|
+
# Example 1: Archive the Entity 'sales_data' in the repo 'vfs_v1' using Entity name.
|
|
3485
|
+
# Create Entity using teradataml DataFrame Column.
|
|
3486
|
+
>>> entity = Entity(name="sales_data", columns=df.accounts)
|
|
3487
|
+
# Apply the entity to FeatureStore.
|
|
3488
|
+
>>> fs.apply(entity)
|
|
3489
|
+
True
|
|
3490
|
+
|
|
3491
|
+
# List all the available entities.
|
|
3492
|
+
>>> fs.list_entities()
|
|
3493
|
+
description creation_time modified_time entity_column
|
|
3494
|
+
name data_domain
|
|
3495
|
+
sales_data ALICE None 2025-07-28 04:54:34.687139 None accounts
|
|
3496
|
+
|
|
3497
|
+
# Archive Entity with name "sales_data".
|
|
3498
|
+
>>> fs.archive_entity(entity=entity.name)
|
|
3499
|
+
Entity 'sales_data' is archived.
|
|
3500
|
+
True
|
|
3501
|
+
|
|
3502
|
+
# List the entities after archive.
|
|
3503
|
+
>>> fs.list_entities(archived=True)
|
|
3504
|
+
name data_domain description creation_time modified_time archived_time entity_column
|
|
3505
|
+
0 sales_data ALICE None 2025-07-28 04:54:34.687139 None 2025-07-28 04:55:46.750000 accounts
|
|
3506
|
+
|
|
3507
|
+
# Example 2: Archive the Entity 'sales_data' in the repo 'vfs_v1' using Entity object.
|
|
3508
|
+
# Create Entity using teradataml DataFrame Column.
|
|
3509
|
+
>>> entity2 = Entity(name="sales_data_df", columns=df.accounts)
|
|
3510
|
+
# Apply the entity to FeatureStore.
|
|
3511
|
+
>>> fs.apply(entity2)
|
|
3512
|
+
True
|
|
3513
|
+
|
|
3514
|
+
# Archive Entity with Entity object.
|
|
3515
|
+
>>> fs.archive_entity(entity=entity2)
|
|
3516
|
+
Entity 'sales_data_df' is archived.
|
|
3517
|
+
True
|
|
3518
|
+
|
|
3519
|
+
# List the entities after archive.
|
|
3520
|
+
>>> fs.list_entities(archived=True)
|
|
3521
|
+
name data_domain description creation_time modified_time archived_time entity_column
|
|
3522
|
+
0 sales_data ALICE None 2025-07-28 04:54:34.687139 None 2025-07-28 04:55:46.750000 accounts
|
|
3523
|
+
1 sales_data_df ALICE None 2025-07-28 04:56:01.123456 None 2025-07-28 04:57:35.456789 accounts
|
|
3524
|
+
|
|
3525
|
+
"""
|
|
3526
|
+
self._logger.info(f"Archiving entity '{entity}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
3527
|
+
return self.__remove_obj(name=entity, type_="entity")
|
|
3528
|
+
|
|
3529
|
+
def delete_entity(self, entity):
|
|
3530
|
+
"""
|
|
3531
|
+
DESCRIPTION:
|
|
3532
|
+
Removes archived Entity from repository.
|
|
3533
|
+
|
|
3534
|
+
PARAMETERS:
|
|
3535
|
+
entity:
|
|
3536
|
+
Required Argument.
|
|
3537
|
+
Specifies either the name of Entity or Object of Entity
|
|
3538
|
+
to delete from repository.
|
|
3539
|
+
Types: str OR Entity
|
|
3540
|
+
|
|
3541
|
+
RETURNS:
|
|
3542
|
+
bool.
|
|
3543
|
+
|
|
3544
|
+
RAISES:
|
|
3545
|
+
TeradataMLException, TypeError, ValueError
|
|
3546
|
+
|
|
3547
|
+
EXAMPLES:
|
|
3548
|
+
>>> from teradataml import DataFrame, Entity, FeatureStore
|
|
3549
|
+
# Create teradataml DataFrame.
|
|
3550
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
3551
|
+
>>> df = DataFrame("sales")
|
|
3552
|
+
|
|
3553
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3554
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
3555
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3556
|
+
# Setup FeatureStore for this repository.
|
|
3557
|
+
>>> fs.setup()
|
|
3558
|
+
True
|
|
3559
|
+
|
|
3560
|
+
# Example 1: Delete the Entity 'sales_data' in the repo 'vfs_v1' using Entity name.
|
|
3561
|
+
# Create Entity using teradataml DataFrame Column.
|
|
3562
|
+
>>> entity = Entity(name="sales_data", columns=df.accounts)
|
|
3563
|
+
# Apply the entity to FeatureStore.
|
|
3564
|
+
>>> fs.apply(entity)
|
|
3565
|
+
True
|
|
3566
|
+
|
|
3567
|
+
# List all the available entities.
|
|
3568
|
+
>>> fs.list_entities()
|
|
3569
|
+
description creation_time modified_time entity_column
|
|
3570
|
+
name data_domain
|
|
3571
|
+
sales_data ALICE None 2025-07-28 04:58:01.123456 None accounts
|
|
3572
|
+
|
|
3573
|
+
# Let's first archive the entity.
|
|
3574
|
+
>>> fs.archive_entity(entity=entity.name)
|
|
3575
|
+
Entity 'sales_data' is archived.
|
|
3576
|
+
True
|
|
3577
|
+
|
|
3578
|
+
# Delete Entity with name "sales_data".
|
|
3579
|
+
>>> fs.delete_entity(entity=entity.name)
|
|
3580
|
+
Entity 'sales_data' is deleted.
|
|
3581
|
+
True
|
|
3582
|
+
|
|
3583
|
+
# List the entities after delete.
|
|
3584
|
+
>>> fs.list_entities()
|
|
3585
|
+
Empty DataFrame
|
|
3586
|
+
Columns: [id, column_name, description, tags, data_type, feature_type, status, creation_time, modified_time, group_name]
|
|
3587
|
+
Index: []
|
|
3588
|
+
|
|
3589
|
+
Example 2: Delete the Entity 'sales_data' in the repo 'vfs_v1' using Entity object.
|
|
3590
|
+
# Create Entity using teradataml DataFrame Column.
|
|
3591
|
+
>>> entity2 = Entity(name="sales_data_df", columns=df.accounts)
|
|
3592
|
+
# Apply the entity to FeatureStore.
|
|
3593
|
+
>>> fs.apply(entity2)
|
|
3594
|
+
True
|
|
3595
|
+
|
|
3596
|
+
# List all the available entities.
|
|
3597
|
+
>>> fs.list_entities()
|
|
3598
|
+
description creation_time modified_time entity_column
|
|
3599
|
+
name data_domain
|
|
3600
|
+
sales_data_df ALICE None 2025-07-28 04:59:14.325456 None accounts
|
|
3601
|
+
|
|
3602
|
+
# Let's first archive the entity.
|
|
3603
|
+
>>> fs.archive_entity(entity=entity2)
|
|
3604
|
+
Entity 'sales_data_df' is archived.
|
|
3605
|
+
True
|
|
3606
|
+
|
|
3607
|
+
# Delete Entity with Entity object.
|
|
3608
|
+
>>> fs.delete_entity(entity=entity2)
|
|
3609
|
+
Entity 'sales_data_df' is deleted.
|
|
3610
|
+
True
|
|
3611
|
+
"""
|
|
3612
|
+
self._logger.info(f"Deleting entity '{entity}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
3613
|
+
return self.__remove_obj(name=entity, type_="entity", action="delete")
|
|
3614
|
+
|
|
3615
|
+
def __get_features_where_clause(self, features):
|
|
3616
|
+
"""
|
|
3617
|
+
Internal function to prepare a where clause on features df.
|
|
3618
|
+
"""
|
|
3619
|
+
col_expr = Col("name") == features[0]
|
|
3620
|
+
for feature in features[1:]:
|
|
3621
|
+
col_expr = ((col_expr) | (Col("name") == feature))
|
|
3622
|
+
col_expr = col_expr & (Col("data_domain") == self.__data_domain)
|
|
3623
|
+
return col_expr
|
|
3624
|
+
|
|
3625
|
+
def archive_feature_group(self, feature_group):
|
|
3626
|
+
"""
|
|
3627
|
+
DESCRIPTION:
|
|
3628
|
+
Archives FeatureGroup from repository. Note that archived FeatureGroup
|
|
3629
|
+
is not available for any further processing. Archived FeatureGroup can be
|
|
3630
|
+
viewed using "list_feature_groups(archived=True)" method.
|
|
3631
|
+
Note:
|
|
3632
|
+
The function archives the associated Features, Entity and DataSource
|
|
3633
|
+
if they are not associated with any other FeatureGroups.
|
|
3634
|
+
|
|
3635
|
+
PARAMETERS:
|
|
3636
|
+
feature_group:
|
|
3637
|
+
Required Argument.
|
|
3638
|
+
Specifies either the name of FeatureGroup or Object of FeatureGroup
|
|
3639
|
+
to archive from repository.
|
|
3640
|
+
Types: str OR FeatureGroup
|
|
3641
|
+
|
|
3642
|
+
RETURNS:
|
|
3643
|
+
bool.
|
|
3644
|
+
|
|
3645
|
+
RAISES:
|
|
3646
|
+
TeradataMLException, TypeError, ValueError
|
|
3647
|
+
|
|
3648
|
+
EXAMPLES:
|
|
3649
|
+
>>> from teradataml import DataFrame, FeatureGroup, FeatureStore
|
|
3650
|
+
# Create teradataml DataFrame.
|
|
3651
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
3652
|
+
>>> df = DataFrame("sales")
|
|
3653
|
+
|
|
3654
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3655
|
+
>>> fs = FeatureStore("vfs_v1", data_domain="d1")
|
|
3656
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3657
|
+
# Setup FeatureStore for this repository.
|
|
3658
|
+
>>> fs.setup()
|
|
3659
|
+
True
|
|
3660
|
+
|
|
3661
|
+
# Example 1: Archive the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup name.
|
|
3662
|
+
# Create FeatureGroup from teradataml DataFrame.
|
|
3663
|
+
>>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
|
|
3664
|
+
# Apply FeatureGroup to FeatureStore.
|
|
3665
|
+
>>> fs.apply(fg)
|
|
3666
|
+
True
|
|
3667
|
+
|
|
3668
|
+
# List all the available FeatureGroups.
|
|
3669
|
+
>>> fs.list_feature_groups()
|
|
3670
|
+
description data_source_name entity_name creation_time modified_time
|
|
3671
|
+
name data_domain
|
|
3672
|
+
sales d1 None sales sales 2025-07-28 05:00:19.780453 None
|
|
3673
|
+
|
|
3674
|
+
# Archive FeatureGroup with name "sales".
|
|
3675
|
+
>>> fs.archive_feature_group(feature_group='sales')
|
|
3676
|
+
FeatureGroup 'sales' is archived.
|
|
3677
|
+
True
|
|
3678
|
+
|
|
3679
|
+
# List all the available FeatureGroups after archive.
|
|
3680
|
+
>>> fs.list_feature_groups(archived=True)
|
|
3681
|
+
name data_domain description data_source_name entity_name creation_time modified_time archived_time
|
|
3682
|
+
0 sales d1 None sales sales 2025-07-28 05:00:19.780453 None 2025-07-28 05:02:04.100000
|
|
3683
|
+
|
|
3684
|
+
# Example 2: Archive the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup object.
|
|
3685
|
+
# Create FeatureGroup from teradataml DataFrame.
|
|
3686
|
+
>>> fg2 = FeatureGroup.from_DataFrame(name="sales_df", entity_columns="accounts", df=df, timestamp_column="datetime")
|
|
3687
|
+
# Apply FeatureGroup to FeatureStore.
|
|
3688
|
+
>>> fs.apply(fg2)
|
|
3689
|
+
True
|
|
3690
|
+
|
|
3691
|
+
# Archive FeatureGroup with FeatureGroup object.
|
|
3692
|
+
>>> fs.archive_feature_group(feature_group=fg2)
|
|
3693
|
+
FeatureGroup 'sales_df' is archived.
|
|
3694
|
+
True
|
|
3695
|
+
|
|
3696
|
+
# List all the available FeatureGroups after archive.
|
|
3697
|
+
>>> fs.list_feature_groups(archived=True)
|
|
3698
|
+
name data_domain description data_source_name entity_name creation_time modified_time archived_time
|
|
3699
|
+
0 sales d1 None sales sales 2025-07-28 05:00:19.780453 None 2025-07-28 05:02:04.100000
|
|
3700
|
+
1 sales_df d1 None sales sales 2025-07-28 05:02:01.123456 None 2025-07-28 05:03:35.456789
|
|
3701
|
+
"""
|
|
3702
|
+
self._logger.info(f"Archiving feature group '{feature_group}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
3703
|
+
|
|
3704
|
+
argument_validation_params = []
|
|
3705
|
+
argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
|
|
3706
|
+
|
|
3707
|
+
# Validate argument types
|
|
3708
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
3709
|
+
|
|
3710
|
+
feature_group_name = feature_group if isinstance(feature_group, str) else feature_group.name
|
|
3711
|
+
|
|
3712
|
+
stg_table = _FeatureStoreDFContainer.get_df("feature_group_staging", self.__repo, self.__data_domain)
|
|
3713
|
+
stg_table = stg_table[stg_table.name == feature_group_name]
|
|
3714
|
+
if stg_table.shape[0] > 0:
|
|
3715
|
+
print("{} '{}' is already archived.".format('FeatureGroup', feature_group_name))
|
|
3716
|
+
return False
|
|
3717
|
+
|
|
3718
|
+
# Check if FeatureGroup is related to any FeatureProcess
|
|
3719
|
+
feature_process_df = self.list_feature_processes()
|
|
3720
|
+
related_processes = feature_process_df[(feature_process_df['data_source'] == feature_group_name)]
|
|
3721
|
+
|
|
3722
|
+
if related_processes.shape[0] > 0:
|
|
3723
|
+
process_ids = [fp.process_id for fp in related_processes.itertuples()]
|
|
3724
|
+
related_process_ids = "feature process(es) {}".format(process_ids)
|
|
3725
|
+
err_code = MessageCodes.EFS_OBJ_IN_FEATURE_PROCESS
|
|
3726
|
+
err_msg = Messages.get_message(err_code,
|
|
3727
|
+
'FeatureGroup',
|
|
3728
|
+
feature_group_name,
|
|
3729
|
+
related_process_ids,
|
|
3730
|
+
"feature process(es)",
|
|
3731
|
+
"FeatureStore.archive_feature_process() and FeatureStore.delete_feature_process()",
|
|
3732
|
+
)
|
|
3733
|
+
raise TeradataMlException(err_msg, err_code)
|
|
3734
|
+
|
|
3735
|
+
fg = self.get_feature_group(feature_group_name) if isinstance(feature_group, str) else feature_group
|
|
3736
|
+
|
|
3737
|
+
fg_df = self.list_feature_groups()
|
|
3738
|
+
|
|
3739
|
+
# Find out shared Features. Extract the features which are mapped to
|
|
3740
|
+
# other groups. They can not be deleted.
|
|
3741
|
+
feature_names = [f.name for f in fg.features]
|
|
3742
|
+
features_df = self.list_features()
|
|
3743
|
+
col_expr = self.__get_features_where_clause(feature_names)
|
|
3744
|
+
features_df = features_df[((features_df.group_name != fg.name) & (col_expr))]
|
|
3745
|
+
shared_features = [f.name for f in features_df.drop_duplicate('name').itertuples()]
|
|
3746
|
+
feature_names_to_remove = [f for f in feature_names if f not in shared_features]
|
|
3747
|
+
|
|
3748
|
+
# Find out shared Entities. If entity is not shared, then update 'entity_name'
|
|
3749
|
+
# to update value.
|
|
3750
|
+
entity_name = None
|
|
3751
|
+
ent = fg_df[((fg_df.entity_name == fg.entity.name) & (fg_df.name != fg.name))]
|
|
3752
|
+
recs = ent.shape[0]
|
|
3753
|
+
if recs == 0:
|
|
3754
|
+
entity_name = fg.entity.name
|
|
3755
|
+
|
|
3756
|
+
# Find out shared DataSources. If datasource is not shared, then update 'data_source_name'.
|
|
3757
|
+
data_source_name = None
|
|
3758
|
+
ds_df = fg_df[((fg_df.data_source_name == fg.data_source.name) & (fg_df.name != fg.name))]
|
|
3759
|
+
recs = ds_df.shape[0]
|
|
3760
|
+
if recs == 0:
|
|
3761
|
+
data_source_name = fg.data_source.name
|
|
3762
|
+
|
|
3763
|
+
res = self._archive_feature_group(fg.name, feature_names_to_remove, entity_name, data_source_name)
|
|
3764
|
+
|
|
3765
|
+
if res == 1:
|
|
3766
|
+
print("FeatureGroup '{}' is archived.".format(feature_group_name))
|
|
3767
|
+
return True
|
|
3768
|
+
|
|
3769
|
+
print("FeatureGroup '{}' not exist to archive.".format(feature_group_name))
|
|
3770
|
+
return False
|
|
3771
|
+
|
|
3772
|
+
@db_transaction
|
|
3773
|
+
def _archive_feature_group(self, group_name, feature_names, entity_name, data_source_name):
|
|
3774
|
+
"""
|
|
3775
|
+
DESCRIPTION:
|
|
3776
|
+
Internal method to archive FeatureGroup from repository.
|
|
3777
|
+
|
|
3778
|
+
PARAMETERS:
|
|
3779
|
+
group_name:
|
|
3780
|
+
Required Argument.
|
|
3781
|
+
Specifies the name of FeatureGroup to archive from repository.
|
|
3782
|
+
Types: str
|
|
3783
|
+
|
|
3784
|
+
feature_names:
|
|
3785
|
+
Required Argument.
|
|
3786
|
+
Specifies the name of Features to archive from repository.
|
|
3787
|
+
Types: list
|
|
3788
|
+
|
|
3789
|
+
entity_name:
|
|
3790
|
+
Required Argument.
|
|
3791
|
+
Specifies the name of Entity to archive from repository.
|
|
3792
|
+
Types: str
|
|
3793
|
+
|
|
3794
|
+
data_source_name:
|
|
3795
|
+
Required Argument.
|
|
3796
|
+
Specifies the name of DataSource to archive from repository.
|
|
3797
|
+
Types: str
|
|
3798
|
+
|
|
3799
|
+
RETURNS:
|
|
3800
|
+
bool.
|
|
3801
|
+
|
|
3802
|
+
RAISES:
|
|
3803
|
+
OperationalError
|
|
3804
|
+
|
|
3805
|
+
EXAMPLES:
|
|
3806
|
+
>>> self._archive_feature_group("group1", ["feature1"], "entity_name", None)
|
|
3807
|
+
"""
|
|
3808
|
+
# Remove data for FeatureGroup from Xref table.
|
|
3809
|
+
# This allows to remove data from other tables.
|
|
3810
|
+
res = _delete_data(schema_name=self.__repo,
|
|
3811
|
+
table_name=self.__table_names["group_features"],
|
|
3812
|
+
delete_conditions=(Col("group_name") == group_name) &
|
|
3813
|
+
(Col("group_data_domain") == self.__data_domain)
|
|
3814
|
+
)
|
|
3815
|
+
|
|
3816
|
+
# Remove FeatureGroup.
|
|
3817
|
+
res = _delete_data(schema_name=self.__repo,
|
|
3818
|
+
table_name=self.__table_names["feature_group"],
|
|
3819
|
+
delete_conditions=(Col("name") == group_name) &
|
|
3820
|
+
(Col("data_domain") == self.__data_domain)
|
|
3821
|
+
)
|
|
3822
|
+
|
|
3823
|
+
# Remove Features.
|
|
3824
|
+
if feature_names:
|
|
3825
|
+
_delete_data(schema_name=self.__repo,
|
|
3826
|
+
table_name=self.__table_names["feature"],
|
|
3827
|
+
delete_conditions=self.__get_features_where_clause(feature_names)
|
|
3828
|
+
)
|
|
3829
|
+
|
|
3830
|
+
# Remove entities.
|
|
3831
|
+
if entity_name:
|
|
3832
|
+
_delete_data(schema_name=self.__repo,
|
|
3833
|
+
table_name=self.__table_names["entity_xref"],
|
|
3834
|
+
delete_conditions=(Col("entity_name") == entity_name) &
|
|
3835
|
+
(Col("data_domain") == self.__data_domain)
|
|
3836
|
+
)
|
|
3837
|
+
|
|
3838
|
+
_delete_data(schema_name=self.__repo,
|
|
3839
|
+
table_name=self.__table_names["entity"],
|
|
3840
|
+
delete_conditions=(Col("name") == entity_name) &
|
|
3841
|
+
(Col("data_domain") == self.__data_domain)
|
|
3842
|
+
)
|
|
3843
|
+
|
|
3844
|
+
# Remove DataSource.
|
|
3845
|
+
if data_source_name:
|
|
3846
|
+
_delete_data(schema_name=self.__repo,
|
|
3847
|
+
table_name=self.__table_names["data_source"],
|
|
3848
|
+
delete_conditions=(Col("name") == data_source_name) &
|
|
3849
|
+
(Col("data_domain") == self.__data_domain)
|
|
3850
|
+
)
|
|
3851
|
+
|
|
3852
|
+
return res
|
|
3853
|
+
|
|
3854
|
+
@db_transaction
|
|
3855
|
+
def delete_feature_group(self, feature_group):
|
|
3856
|
+
"""
|
|
3857
|
+
DESCRIPTION:
|
|
3858
|
+
Removes archived FeatureGroup from repository.
|
|
3859
|
+
Note:
|
|
3860
|
+
Unlike 'archive_feature_group()', this function does not delete the
|
|
3861
|
+
associated Features, Entity and DataSource. One should delete those
|
|
3862
|
+
using 'delete_feature()', 'delete_entity()' and 'delete_data_source()'.
|
|
3863
|
+
|
|
3864
|
+
PARAMETERS:
|
|
3865
|
+
feature_group:
|
|
3866
|
+
Required Argument.
|
|
3867
|
+
Specifies either the name of FeatureGroup or Object of FeatureGroup
|
|
3868
|
+
to delete from repository.
|
|
3869
|
+
Types: str OR FeatureGroup
|
|
3870
|
+
|
|
3871
|
+
RETURNS:
|
|
3872
|
+
bool
|
|
3873
|
+
|
|
3874
|
+
RAISES:
|
|
3875
|
+
TeradataMLException, TypeError, ValueError
|
|
3876
|
+
|
|
3877
|
+
EXAMPLES:
|
|
3878
|
+
>>> from teradataml import DataFrame, FeatureGroup, FeatureStore
|
|
3879
|
+
# Create teradataml DataFrame.
|
|
3880
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
3881
|
+
>>> df = DataFrame("sales")
|
|
3882
|
+
|
|
3883
|
+
# Create FeatureStore for repo 'vfs_v1'.
|
|
3884
|
+
>>> fs = FeatureStore("vfs_v1", data_domain="d1")
|
|
3885
|
+
Repo vfs_v1 does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
3886
|
+
# Setup FeatureStore for this repository.
|
|
3887
|
+
>>> fs.setup()
|
|
3888
|
+
True
|
|
3889
|
+
|
|
3890
|
+
# Example 1: Delete the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup name.
|
|
3891
|
+
# Create FeatureGroup from teradataml DataFrame.
|
|
3892
|
+
>>> fg = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
|
|
3893
|
+
# Apply FeatureGroup to FeatureStore.
|
|
3894
|
+
>>> fs.apply(fg)
|
|
3895
|
+
True
|
|
3896
|
+
|
|
3897
|
+
# List all the available FeatureGroups.
|
|
3898
|
+
>>> fs.list_feature_groups()
|
|
3899
|
+
description data_source_name entity_name creation_time modified_time
|
|
3900
|
+
name data_domain
|
|
3901
|
+
sales d1 None sales sales 2025-07-28 05:00:19.780453 None
|
|
3902
|
+
|
|
3903
|
+
# Archive FeatureGroup with name "sales".
|
|
3904
|
+
>>> fs.archive_feature_group(feature_group='sales')
|
|
3905
|
+
FeatureGroup 'sales' is archived.
|
|
3906
|
+
True
|
|
3907
|
+
|
|
3908
|
+
# Delete FeatureGroup with name "sales".
|
|
3909
|
+
>>> fs.delete_feature_group(feature_group='sales')
|
|
3910
|
+
FeatureGroup 'sales' is deleted.
|
|
3911
|
+
True
|
|
3912
|
+
|
|
3913
|
+
# List all the available FeatureGroups after delete.
|
|
3914
|
+
>>> fs.list_feature_groups()
|
|
3915
|
+
Empty DataFrame
|
|
3916
|
+
Columns: [description, data_source_name, entity_name, creation_time, modified_time]
|
|
3917
|
+
Index: []
|
|
3918
|
+
|
|
3919
|
+
Example 2: Delete the FeatureGroup 'sales' in the repo 'vfs_v1' using FeatureGroup object.
|
|
3920
|
+
# Create FeatureGroup from teradataml DataFrame.
|
|
3921
|
+
>>> fg2 = FeatureGroup.from_DataFrame(name="sales", entity_columns="accounts", df=df, timestamp_column="datetime")
|
|
3922
|
+
# Apply FeatureGroup to FeatureStore.
|
|
3923
|
+
>>> fs.apply(fg2)
|
|
3924
|
+
True
|
|
3925
|
+
|
|
3926
|
+
# Archive FeatureGroup with FeatureGroup object.
|
|
3927
|
+
>>> fs.archive_feature_group(feature_group=fg2)
|
|
3928
|
+
FeatureGroup 'sales' is archived.
|
|
3929
|
+
True
|
|
3930
|
+
|
|
3931
|
+
# Delete FeatureGroup with FeatureGroup object.
|
|
3932
|
+
>>> fs.delete_feature_group(feature_group=fg2)
|
|
3933
|
+
FeatureGroup 'sales' is deleted.
|
|
3934
|
+
True
|
|
3935
|
+
"""
|
|
3936
|
+
self._logger.info(f"Deleting feature group '{feature_group}' from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
3937
|
+
|
|
3938
|
+
argument_validation_params = []
|
|
3939
|
+
argument_validation_params.append(["feature_group", feature_group, False, (str, FeatureGroup), True])
|
|
3940
|
+
|
|
3941
|
+
# Validate argument types
|
|
3942
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
3943
|
+
|
|
3944
|
+
fg_name = feature_group if isinstance(feature_group, str) else feature_group.name
|
|
3945
|
+
|
|
3946
|
+
# Validation for delete action - ensure FeatureGroup is already archived
|
|
3947
|
+
main_fg_df = self.__get_feature_group_df()
|
|
3948
|
+
existing_records = main_fg_df[main_fg_df["name"] == fg_name]
|
|
3949
|
+
|
|
3950
|
+
if existing_records.shape[0] > 0:
|
|
3951
|
+
error_code = MessageCodes.EFS_DELETE_BEFORE_ARCHIVE
|
|
3952
|
+
error_msg = Messages.get_message(
|
|
3953
|
+
error_code,
|
|
3954
|
+
'FeatureGroup',
|
|
3955
|
+
fg_name,
|
|
3956
|
+
'feature_group')
|
|
3957
|
+
raise TeradataMlException(error_msg, error_code)
|
|
3958
|
+
|
|
3959
|
+
# Remove data for FeatureGroup.
|
|
3960
|
+
_delete_data(table_name=self.__table_names["group_features_staging"],
|
|
3961
|
+
schema_name=self.__repo,
|
|
3962
|
+
delete_conditions=(Col("group_name") == fg_name) &
|
|
3963
|
+
(Col("group_data_domain") == self.__data_domain)
|
|
3964
|
+
)
|
|
3965
|
+
|
|
3966
|
+
res = _delete_data(table_name=self.__table_names["feature_group_staging"],
|
|
3967
|
+
schema_name=self.__repo,
|
|
3968
|
+
delete_conditions=(Col("name") == fg_name) &
|
|
3969
|
+
(Col("data_domain") == self.__data_domain)
|
|
3970
|
+
)
|
|
3971
|
+
|
|
3972
|
+
if res == 1:
|
|
3973
|
+
print("FeatureGroup '{}' is deleted.".format(fg_name))
|
|
3974
|
+
return True
|
|
3975
|
+
|
|
3976
|
+
print("FeatureGroup '{}' does not exist to delete.".format(fg_name))
|
|
3977
|
+
return False
|
|
3978
|
+
|
|
3979
|
+
@property
|
|
3980
|
+
def version(self):
|
|
3981
|
+
"""
|
|
3982
|
+
DESCRIPTION:
|
|
3983
|
+
Get the FeatureStore version.
|
|
3984
|
+
|
|
3985
|
+
PARAMETERS:
|
|
3986
|
+
None
|
|
3987
|
+
|
|
3988
|
+
RETURNS:
|
|
3989
|
+
str
|
|
3990
|
+
|
|
3991
|
+
RAISES:
|
|
3992
|
+
None
|
|
3993
|
+
|
|
3994
|
+
EXAMPLES:
|
|
3995
|
+
# Example 1: Get the version of FeatureStore version for
|
|
3996
|
+
# the repo 'vfs_v1'.
|
|
3997
|
+
>>> from teradataml import FeatureStore
|
|
3998
|
+
>>> fs = FeatureStore('vfs_v1')
|
|
3999
|
+
FeatureStore is ready to use.
|
|
4000
|
+
|
|
4001
|
+
# Get the version of FeatureStore.
|
|
4002
|
+
>>> fs.version
|
|
4003
|
+
'2.0.0'
|
|
4004
|
+
"""
|
|
4005
|
+
self._logger.debug(f"Accessing version property for repository '{self.__repo}'")
|
|
4006
|
+
if self.__version is None:
|
|
4007
|
+
self.__version = self.__get_version()
|
|
4008
|
+
self._logger.debug(f"Retrieved FeatureStore version: {self.__version}")
|
|
4009
|
+
return self.__version
|
|
4010
|
+
|
|
4011
|
+
def list_feature_catalogs(self) -> DataFrame:
|
|
4012
|
+
"""
|
|
4013
|
+
DESCRIPTION:
|
|
4014
|
+
Lists all the feature catalogs.
|
|
4015
|
+
|
|
4016
|
+
PARAMETERS:
|
|
4017
|
+
None
|
|
4018
|
+
|
|
4019
|
+
RETURNS:
|
|
4020
|
+
teradataml DataFrame
|
|
4021
|
+
|
|
4022
|
+
RAISES:
|
|
4023
|
+
None
|
|
4024
|
+
|
|
4025
|
+
EXAMPLES:
|
|
4026
|
+
# Example 1: List all the feature catalogs in the repo 'vfs_v1'.
|
|
4027
|
+
>>> from teradataml import FeatureStore
|
|
4028
|
+
|
|
4029
|
+
# Create FeatureStore for the repo 'vfs_v1' or use existing one.
|
|
4030
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
4031
|
+
FeatureStore is ready to use.
|
|
4032
|
+
|
|
4033
|
+
# Load the sales data.
|
|
4034
|
+
>>> load_example_data("dataframe", "sales")
|
|
4035
|
+
>>> df = DataFrame("sales")
|
|
4036
|
+
|
|
4037
|
+
# Create a feature process.
|
|
4038
|
+
>>> from teradataml import FeatureProcess
|
|
4039
|
+
>>> fp = FeatureProcess(repo="vfs_v1",
|
|
4040
|
+
... data_domain='sales',
|
|
4041
|
+
... object=df,
|
|
4042
|
+
... entity="accounts",
|
|
4043
|
+
... features=["Jan", "Feb", "Mar", "Apr"])
|
|
4044
|
+
>>> fp.run()
|
|
4045
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' started.
|
|
4046
|
+
Process '5747082b-4acb-11f0-a2d7-f020ffe7fe09' completed.
|
|
4047
|
+
|
|
4048
|
+
# List all the feature catalogs in the repo 'vfs_v1'.
|
|
4049
|
+
>>> fs.list_feature_catalogs()
|
|
4050
|
+
data_domain feature_id table_name valid_start valid_end
|
|
4051
|
+
entity_name
|
|
4052
|
+
accounts sales 2 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
|
|
4053
|
+
accounts sales 100001 FS_T_e84ff803_3d5c_4793_cd72_251c780fffe4 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
|
|
4054
|
+
accounts sales 1 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
|
|
4055
|
+
accounts sales 200001 FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63 2025-06-16 16:02:49.481245+00: 9999-12-31 23:59:59.999999+00:
|
|
4056
|
+
"""
|
|
4057
|
+
self._logger.info(f"Listing feature catalogs from repository '{self.__repo}', data_domain '{self.__data_domain}'")
|
|
4058
|
+
df = self.__get_without_valid_period_df(self.__get_features_metadata_df())
|
|
4059
|
+
self._logger.debug(f"Retrieved feature catalogs:\n{df}")
|
|
4060
|
+
return df
|
|
4061
|
+
|
|
4062
|
+
def archive_feature_process(self, process_id):
|
|
4063
|
+
"""
|
|
4064
|
+
DESCRIPTION:
|
|
4065
|
+
Archives the FeatureProcess with the given process_id.
|
|
4066
|
+
Notes:
|
|
4067
|
+
* Archived FeatureProcess is not available for any further processing.
|
|
4068
|
+
* Archived FeatureProcess can be viewed using `FeatureStore.list_feature_processes(archived=True)`.
|
|
4069
|
+
method.
|
|
4070
|
+
* Same feature can be ingested by multiple processes. If feature associated with
|
|
4071
|
+
process "process_id" is also associated with other processes, then this
|
|
4072
|
+
function only archives the feature values associated with the process "process_id". Else
|
|
4073
|
+
it archives the feature from the feature catalog. Look at `FeatureCatalog.archive_features()`.
|
|
4074
|
+
for more details.
|
|
4075
|
+
|
|
4076
|
+
PARAMETERS:
|
|
4077
|
+
process_id:
|
|
4078
|
+
Required Argument.
|
|
4079
|
+
Specifies the ID of the FeatureProcess to archive from repository.
|
|
4080
|
+
Types: str
|
|
4081
|
+
|
|
4082
|
+
RETURNS:
|
|
4083
|
+
bool
|
|
4084
|
+
|
|
4085
|
+
RAISES:
|
|
4086
|
+
TeradataMLException, TypeError, ValueError
|
|
4087
|
+
|
|
4088
|
+
EXAMPLES:
|
|
4089
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
4090
|
+
# Create a teradataml DataFrame.
|
|
4091
|
+
>>> from teradataml import DataFrame, FeatureProcess, FeatureStore
|
|
4092
|
+
>>> df = DataFrame("sales")
|
|
4093
|
+
|
|
4094
|
+
# Create FeatureStore for repo 'repo'.
|
|
4095
|
+
>>> fs = FeatureStore("repo", data_domain='sales')
|
|
4096
|
+
Repo repo does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
4097
|
+
# Setup FeatureStore for this repository.
|
|
4098
|
+
>>> fs.setup()
|
|
4099
|
+
True
|
|
4100
|
+
|
|
4101
|
+
# Run FeatureProcess to ingest features.
|
|
4102
|
+
>>> from teradataml import FeatureProcess
|
|
4103
|
+
>>> fp = FeatureProcess(repo='repo',
|
|
4104
|
+
... data_domain='sales',
|
|
4105
|
+
... object=df,
|
|
4106
|
+
... entity='accounts',
|
|
4107
|
+
... features=['Jan', 'Feb', 'Mar', 'Apr'])
|
|
4108
|
+
>>> fp.run()
|
|
4109
|
+
Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' started.
|
|
4110
|
+
Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' completed.
|
|
4111
|
+
|
|
4112
|
+
# List the available FeatureProcesses.
|
|
4113
|
+
>>> fs.list_feature_processes()
|
|
4114
|
+
description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
|
|
4115
|
+
process_id
|
|
4116
|
+
2a014f2d-6b71-11f0-aeda-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-07-28 05:10:34.760000+00: 9999-12-31 23:59:59.999999+00:
|
|
4117
|
+
|
|
4118
|
+
# Example: Archive the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
|
|
4119
|
+
>>> fs.archive_feature_process("2a014f2d-6b71-11f0-aeda-f020ffe7fe09")
|
|
4120
|
+
Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
4121
|
+
Feature 'Jan' is archived from metadata.
|
|
4122
|
+
Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
|
|
4123
|
+
Feature 'Feb' is archived from metadata.
|
|
4124
|
+
Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
4125
|
+
Feature 'Mar' is archived from metadata.
|
|
4126
|
+
Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
4127
|
+
Feature 'Apr' is archived from metadata.
|
|
4128
|
+
FeatureProcess with process id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is archived.
|
|
4129
|
+
True
|
|
4130
|
+
"""
|
|
4131
|
+
argument_validation_params = []
|
|
4132
|
+
argument_validation_params.append(["process_id", process_id, True, str, True])
|
|
4133
|
+
|
|
4134
|
+
# Validate argument types
|
|
4135
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
4136
|
+
|
|
4137
|
+
features = self.__validate_feature_process(process_id)
|
|
4138
|
+
if features is False:
|
|
4139
|
+
return False
|
|
4140
|
+
|
|
4141
|
+
feature_details = FeatureCatalog._get_feature_details(
|
|
4142
|
+
self.__repo, self.__data_domain, features)
|
|
4143
|
+
|
|
4144
|
+
# Get the shared features.
|
|
4145
|
+
shared_features = FeatureCatalog._get_shared_features(self.__repo, self.__data_domain)
|
|
4146
|
+
|
|
4147
|
+
# Remove the features from the feature metadata table.
|
|
4148
|
+
return self.__remove_feature_process(
|
|
4149
|
+
process_id, features, feature_details, shared_features)
|
|
4150
|
+
|
|
4151
|
+
def delete_feature_process(self, process_id):
|
|
4152
|
+
"""
|
|
4153
|
+
DESCRIPTION:
|
|
4154
|
+
Deletes the archived feature process from feature store with the given process_id.
|
|
4155
|
+
Notes:
|
|
4156
|
+
* One feature can be ingested by multiple processes. If feature associated with
|
|
4157
|
+
process "process_id" is also ingested by other processes, then "delete_feature_process()"
|
|
4158
|
+
function only deletes the feature values associated with the process "process_id". Else
|
|
4159
|
+
it deletes the feature from the feature catalog. Look at 'FeatureCatalog.delete_features()'
|
|
4160
|
+
for more details.
|
|
4161
|
+
|
|
4162
|
+
PARAMETERS:
|
|
4163
|
+
process_id:
|
|
4164
|
+
Required Argument.
|
|
4165
|
+
Specifies the ID of the FeatureProcess to delete from repository.
|
|
4166
|
+
Types: str
|
|
4167
|
+
|
|
4168
|
+
RETURNS:
|
|
4169
|
+
bool
|
|
4170
|
+
|
|
4171
|
+
RAISES:
|
|
4172
|
+
TeradataMLException, TypeError, ValueError
|
|
4173
|
+
|
|
4174
|
+
EXAMPLES:
|
|
4175
|
+
>>> load_example_data('dataframe', ['sales'])
|
|
4176
|
+
# Create a teradataml DataFrame.
|
|
4177
|
+
>>> from teradataml import DataFrame, FeatureProcess, FeatureStore
|
|
4178
|
+
>>> df = DataFrame("sales")
|
|
4179
|
+
|
|
4180
|
+
# Create FeatureStore for repo 'repo'.
|
|
4181
|
+
>>> fs = FeatureStore("repo", data_domain='sales')
|
|
4182
|
+
Repo repo does not exist. Run FeatureStore.setup() to create the repo and setup FeatureStore.
|
|
4183
|
+
# Setup FeatureStore for this repository.
|
|
4184
|
+
>>> fs.setup()
|
|
4185
|
+
True
|
|
4186
|
+
|
|
4187
|
+
# Run FeatureProcess to ingest features.
|
|
4188
|
+
>>> from teradataml import FeatureProcess
|
|
4189
|
+
>>> fp = FeatureProcess(repo='repo',
|
|
4190
|
+
... data_domain='sales',
|
|
4191
|
+
... object=df,
|
|
4192
|
+
... entity='accounts',
|
|
4193
|
+
... features=['Jan', 'Feb', 'Mar', 'Apr'])
|
|
4194
|
+
>>> fp.run()
|
|
4195
|
+
Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' started.
|
|
4196
|
+
Process '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' completed.
|
|
4197
|
+
|
|
4198
|
+
# List the available FeatureProcesses.
|
|
4199
|
+
>>> fs.list_feature_processes()
|
|
4200
|
+
description data_domain process_type data_source entity_id feature_names feature_ids valid_start valid_end
|
|
4201
|
+
process_id
|
|
4202
|
+
2a014f2d-6b71-11f0-aeda-f020ffe7fe09 sales denormalized view "sales" accounts Apr, Feb, Jan, Mar None 2025-07-28 05:10:34.760000+00: 9999-12-31 23:59:59.999999+00:
|
|
4203
|
+
|
|
4204
|
+
# Example: Archive the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
|
|
4205
|
+
>>> fs.archive_feature_process("2a014f2d-6b71-11f0-aeda-f020ffe7fe09")
|
|
4206
|
+
Feature 'Jan' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
4207
|
+
Feature 'Jan' is archived from metadata.
|
|
4208
|
+
Feature 'Feb' is archived from table 'FS_T_6003dc24_375e_7fd6_46f0_eeb868305c4a'.
|
|
4209
|
+
Feature 'Feb' is archived from metadata.
|
|
4210
|
+
Feature 'Mar' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
4211
|
+
Feature 'Mar' is archived from metadata.
|
|
4212
|
+
Feature 'Apr' is archived from table 'FS_T_a38baff6_821b_3bb7_0850_827fe5372e31'.
|
|
4213
|
+
Feature 'Apr' is archived from metadata.
|
|
4214
|
+
FeatureProcess with process id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is archived.
|
|
4215
|
+
True
|
|
4216
|
+
|
|
4217
|
+
# Example: Delete the FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09'.
|
|
4218
|
+
>>> fs.delete_feature_process('2a014f2d-6b71-11f0-aeda-f020ffe7fe09')
|
|
4219
|
+
Feature 'Feb' deleted successfully from table 'FS_T_e84ff803_3d5c_4793_cd72_251c780fffe4'.
|
|
4220
|
+
Feature 'Jan' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
|
|
4221
|
+
Feature 'Mar' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
|
|
4222
|
+
Feature 'Apr' deleted successfully from table 'FS_T_918e1cb4_c6bc_6d38_634d_7b9fe53e2a63'.
|
|
4223
|
+
FeatureProcess with process_id '2a014f2d-6b71-11f0-aeda-f020ffe7fe09' is deleted.
|
|
4224
|
+
True
|
|
4225
|
+
|
|
4226
|
+
# List the available FeatureProcesses after delete.
|
|
4227
|
+
>>> fs.list_feature_processes()
|
|
4228
|
+
Empty DataFrame
|
|
4229
|
+
Columns: [description, data_domain, process_type, data_source, entity_id, feature_names, feature_ids, valid_start, valid_end]
|
|
4230
|
+
Index: []
|
|
4231
|
+
"""
|
|
4232
|
+
argument_validation_params = []
|
|
4233
|
+
argument_validation_params.append(["process_id", process_id, True, str, True])
|
|
4234
|
+
|
|
4235
|
+
# Validate argument types
|
|
4236
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
4237
|
+
|
|
4238
|
+
# Before archive check if the specified process id is existed or not.
|
|
4239
|
+
features = self.__validate_feature_process(process_id, type_='delete')
|
|
4240
|
+
if features is False:
|
|
4241
|
+
return False
|
|
4242
|
+
|
|
4243
|
+
feature_details = FeatureCatalog._get_feature_details(
|
|
4244
|
+
self.__repo, self.__data_domain, features)
|
|
4245
|
+
|
|
4246
|
+
# Get the shared features.
|
|
4247
|
+
shared_features = FeatureCatalog._get_shared_features(self.__repo, self.__data_domain)
|
|
4248
|
+
|
|
4249
|
+
return self.__remove_feature_process(
|
|
4250
|
+
process_id, features, feature_details, shared_features, type_='delete')
|
|
4251
|
+
|
|
4252
|
+
@db_transaction
|
|
4253
|
+
def __remove_feature_process(self,
|
|
4254
|
+
process_id,
|
|
4255
|
+
process_features,
|
|
4256
|
+
feature_details,
|
|
4257
|
+
shared_features,
|
|
4258
|
+
type_='archive'):
|
|
4259
|
+
"""
|
|
4260
|
+
DESCRIPTION:
|
|
4261
|
+
Internal function to remove the FeatureProcess from repository.
|
|
4262
|
+
It also removes the associated features from the feature table.
|
|
4263
|
+
|
|
4264
|
+
PARAMETERS:
|
|
4265
|
+
process_id:
|
|
4266
|
+
Required Argument.
|
|
4267
|
+
Specifies the ID of the FeatureProcess to remove from repository.
|
|
4268
|
+
Types: str
|
|
4269
|
+
|
|
4270
|
+
feature_details:
|
|
4271
|
+
Required Argument.
|
|
4272
|
+
Specifies the list of features to remove from repository.
|
|
4273
|
+
Types: list of namedtuple
|
|
4274
|
+
|
|
4275
|
+
type_:
|
|
4276
|
+
Optional Argument.
|
|
4277
|
+
Specifies the type of removal. Allowed values are 'archive' and 'delete'.
|
|
4278
|
+
Default value is 'archive'.
|
|
4279
|
+
Types: str
|
|
4280
|
+
|
|
4281
|
+
RETURNS:
|
|
4282
|
+
bool
|
|
4283
|
+
|
|
4284
|
+
RAISES:
|
|
4285
|
+
None
|
|
4286
|
+
|
|
4287
|
+
EXAMPLES:
|
|
4288
|
+
>>> self.__remove_feature_process("5747082b-4acb-11f0-a2d7-f020ffe7fe09",
|
|
4289
|
+
... process_features=[namedtuple('feature_', ['name', 'id', 'table_name'])('sales_data_Feb', 1, 'FS_T_12345')],
|
|
4290
|
+
... type_='archive')
|
|
4291
|
+
"""
|
|
4292
|
+
self._logger.info(f"Removing feature process '{process_id}' from repository '{self.__repo}', action: {type_}")
|
|
4293
|
+
|
|
4294
|
+
temporal_clause = 'CURRENT VALIDTIME'
|
|
4295
|
+
delete_condition = (Col("process_id") == process_id)
|
|
4296
|
+
if type_ == 'delete':
|
|
4297
|
+
temporal_clause = None
|
|
4298
|
+
|
|
4299
|
+
self._logger.debug(f"Removing {len(process_features)} features from feature catalog for process '{process_id}'")
|
|
4300
|
+
fc = FeatureCatalog(self.__repo, self.__data_domain)
|
|
4301
|
+
res1 = fc._remove_features(process_features, feature_details, type_=='archive', shared_features, process_id)
|
|
4302
|
+
|
|
4303
|
+
# Remove it from feature process table.
|
|
4304
|
+
self._logger.debug(f"Removing process '{process_id}' from feature_process table")
|
|
4305
|
+
res = _delete_data(table_name=self.__table_names["feature_process"],
|
|
4306
|
+
schema_name=self.__repo,
|
|
4307
|
+
delete_conditions=delete_condition,
|
|
4308
|
+
temporal_clause=temporal_clause
|
|
4309
|
+
)
|
|
4310
|
+
|
|
4311
|
+
if res >= 1:
|
|
4312
|
+
print("FeatureProcess with process id '{}' is {}d.".format(process_id, type_))
|
|
4313
|
+
return res1 & True
|
|
4314
|
+
|
|
4315
|
+
print("FeatureProcess with process id '{}' does not exist to {}.".format(process_id, type_))
|
|
4316
|
+
return res1 & False
|
|
4317
|
+
|
|
4318
|
+
def __validate_feature_process(self, process_id, type_='archive'):
|
|
4319
|
+
"""
|
|
4320
|
+
DESCRIPTION:
|
|
4321
|
+
Internal function to validate if the feature process is existed or not.
|
|
4322
|
+
Also, the function checks if the process is archived or not.
|
|
4323
|
+
|
|
4324
|
+
PARAMETERS:
|
|
4325
|
+
process_id:
|
|
4326
|
+
Required Argument.
|
|
4327
|
+
Specifies the ID of the FeatureProcess to validate.
|
|
4328
|
+
Types: str
|
|
4329
|
+
|
|
4330
|
+
type_:
|
|
4331
|
+
Optional Argument.
|
|
4332
|
+
Specifies the type of validation. Allowed values are 'archive' and 'delete'.
|
|
4333
|
+
Default value is 'archive'.
|
|
4334
|
+
Types: str
|
|
4335
|
+
|
|
4336
|
+
RETURNS:
|
|
4337
|
+
list or bool.
|
|
4338
|
+
False if process does not exist or archived.
|
|
4339
|
+
list if all validations are passed.
|
|
4340
|
+
|
|
4341
|
+
RAISES:
|
|
4342
|
+
TeradatamlException
|
|
4343
|
+
|
|
4344
|
+
EXAMPLES:
|
|
4345
|
+
>>> # Validate the feature process with process_id '5747082b-4acb-11f0-a2d7-f020ffe7fe09'.
|
|
4346
|
+
>>> fs.__validate_feature_process(process_id='5747082b-4acb-11f0-a2d7-f020ffe7fe09')
|
|
4347
|
+
(['sales_data_Feb', 'sales_data_Jan'], ['sales_data_Mar', 'sales_data_Apr'])
|
|
4348
|
+
"""
|
|
4349
|
+
# Extract process type, data source, entity_id, feature_names from given process id.
|
|
4350
|
+
sql = EFS_ARCHIVED_RECORDS.format("feature_names",
|
|
4351
|
+
'"{}"."{}"'.format(self.__repo,
|
|
4352
|
+
self.__table_names["feature_process"]),
|
|
4353
|
+
"PROCESS_ID = '{}' AND DATA_DOMAIN = '{}'".
|
|
4354
|
+
format(process_id, self.__data_domain))
|
|
4355
|
+
|
|
4356
|
+
feature_names = set()
|
|
4357
|
+
all_archived = True
|
|
4358
|
+
any_one_not_archived = False
|
|
4359
|
+
for rec in execute_sql(sql):
|
|
4360
|
+
is_archived = rec[1] == 1
|
|
4361
|
+
all_archived = all_archived and is_archived
|
|
4362
|
+
any_one_not_archived = any_one_not_archived or (not is_archived)
|
|
4363
|
+
feature_names.update([f.strip() for f in rec[0].split(",")])
|
|
4364
|
+
|
|
4365
|
+
# Not raising error to align with the behavior of other methods.
|
|
4366
|
+
if not feature_names:
|
|
4367
|
+
print("FeatureProcess with process id '{}' does not exist.".format(process_id))
|
|
4368
|
+
return False
|
|
4369
|
+
|
|
4370
|
+
# Check if feature is already archived or not.
|
|
4371
|
+
if type_ == 'archive' and all_archived:
|
|
4372
|
+
# All records valid end date should be less than current timestamp in such case.
|
|
4373
|
+
print("FeatureProcess with process id '{}' is already archived.".format(process_id))
|
|
4374
|
+
return False
|
|
4375
|
+
|
|
4376
|
+
# For delete, check if the process is archived or not first.
|
|
4377
|
+
if type_ == 'delete' and any_one_not_archived:
|
|
4378
|
+
print("FeatureProcess with process id '{}' is not archived. "
|
|
4379
|
+
"First archive the process and then delete it.".format(process_id))
|
|
4380
|
+
return False
|
|
4381
|
+
|
|
4382
|
+
# Check if feature is associated with any dataset or not.
|
|
4383
|
+
dataset_features_df = self.__get_dataset_features_df()
|
|
4384
|
+
# Validate the feature names.
|
|
4385
|
+
_Validators._validate_features_not_in_efs_dataset(
|
|
4386
|
+
df=dataset_features_df[(dataset_features_df['data_domain'] == self.__data_domain)],
|
|
4387
|
+
feature_names=list(feature_names),
|
|
4388
|
+
action='archived')
|
|
4389
|
+
|
|
4390
|
+
return feature_names
|
|
4391
|
+
|
|
4392
|
+
def remove_data_domain(self):
|
|
4393
|
+
"""
|
|
4394
|
+
DESCRIPTION:
|
|
4395
|
+
Removes the data domain from the FeatureStore and all associated objects.
|
|
4396
|
+
|
|
4397
|
+
Notes:
|
|
4398
|
+
* This operation permanently deletes all objects, tables, and views tied to the data domain.
|
|
4399
|
+
* There is no archival or built‑in recovery, all deletions are irreversible.
|
|
4400
|
+
|
|
4401
|
+
PARAMETERS:
|
|
4402
|
+
None
|
|
4403
|
+
|
|
4404
|
+
RETURNS:
|
|
4405
|
+
bool
|
|
4406
|
+
|
|
4407
|
+
RAISES:
|
|
4408
|
+
TeradataMLException
|
|
4409
|
+
|
|
4410
|
+
EXAMPLES:
|
|
4411
|
+
>>> from teradataml import FeatureStore
|
|
4412
|
+
# Create a new FeatureStore or use an existing one.
|
|
4413
|
+
>>> fs = FeatureStore("repo", data_domain="sales")
|
|
4414
|
+
FeatureStore is ready to use.
|
|
4415
|
+
|
|
4416
|
+
# Remove the data domain 'sales' and all associated objects.
|
|
4417
|
+
>>> fs.remove_data_domain()
|
|
4418
|
+
The function will remove the data domain 'sales' and all associated objects. Are you sure you want to proceed? (Y/N): Y
|
|
4419
|
+
Data domain 'sales' is removed from the FeatureStore.
|
|
4420
|
+
True
|
|
4421
|
+
"""
|
|
4422
|
+
self._logger.info(f"Removing data domain '{self.__data_domain}' from repository '{self.__repo}'")
|
|
4423
|
+
|
|
4424
|
+
confirmation = input("The function will remove the data domain '{}' and" \
|
|
4425
|
+
" all associated objects. Are you sure you want to proceed? (Y/N): ".format(self.__data_domain))
|
|
4426
|
+
|
|
4427
|
+
if confirmation not in ["Y", "y"]:
|
|
4428
|
+
self._logger.info(f"Data domain removal cancelled by user")
|
|
4429
|
+
return False
|
|
4430
|
+
|
|
4431
|
+
# Get the views to drop related to the data domain.
|
|
4432
|
+
dataset_features_df = self.__get_dataset_features_df()
|
|
4433
|
+
filtered_dataset_features_df = dataset_features_df[dataset_features_df['data_domain'] == self.__data_domain].itertuples()
|
|
4434
|
+
views_to_drop = list({rec.feature_view for rec in filtered_dataset_features_df})
|
|
4435
|
+
|
|
4436
|
+
# Get the tables to drop related to the data domain.
|
|
4437
|
+
features_metadata_df = self.__get_features_metadata_df()
|
|
4438
|
+
filtered_features_metadata_df = features_metadata_df[features_metadata_df['data_domain'] == self.__data_domain].itertuples()
|
|
4439
|
+
tables_to_drop = list({rec.table_name for rec in filtered_features_metadata_df})
|
|
4440
|
+
|
|
4441
|
+
res = db_transaction(self.__remove_data_domain)()
|
|
4442
|
+
|
|
4443
|
+
# Drop the views related to the data domain.
|
|
4444
|
+
for view in views_to_drop:
|
|
4445
|
+
try:
|
|
4446
|
+
execute_sql(f"DROP VIEW {_get_quoted_object_name(schema_name=self.__repo, object_name=view)}")
|
|
4447
|
+
except Exception as e:
|
|
4448
|
+
print(f"Error dropping view {view}: {e}")
|
|
4449
|
+
# Drop the tables related to the data domain.
|
|
4450
|
+
for table in tables_to_drop:
|
|
4451
|
+
try:
|
|
4452
|
+
execute_sql(f"DROP TABLE {_get_quoted_object_name(schema_name=self.__repo, object_name=table)}")
|
|
4453
|
+
except Exception as e:
|
|
4454
|
+
print(f"Error dropping table {table}: {e}")
|
|
4455
|
+
|
|
4456
|
+
return True
|
|
4457
|
+
|
|
4458
|
+
def __remove_data_domain(self):
|
|
4459
|
+
"""
|
|
4460
|
+
DESCRIPTION:
|
|
4461
|
+
Internal method to remove the data domain from the FeatureStore and all associated objects.
|
|
4462
|
+
|
|
4463
|
+
PARAMETERS:
|
|
4464
|
+
None
|
|
4465
|
+
|
|
4466
|
+
RETURNS:
|
|
4467
|
+
bool
|
|
4468
|
+
|
|
4469
|
+
RAISES:
|
|
4470
|
+
TeradataMLException
|
|
4471
|
+
|
|
4472
|
+
EXAMPLES:
|
|
4473
|
+
>>> fs.__remove_data_domain()
|
|
4474
|
+
"""
|
|
4475
|
+
self._logger.debug(f"Starting removal of data domain '{self.__data_domain}' from repository '{self.__repo}'")
|
|
4476
|
+
# TO remove data domain from the FeatureStore, we need to:
|
|
4477
|
+
# 1. Remove data domain entries from the dataset catalog and dataset features.
|
|
4478
|
+
# 2. Remove data domain entries from the feature metadata.
|
|
4479
|
+
# 3. Remove data domain entries from the feature processes.
|
|
4480
|
+
# 4. Remove data_domain entries from feature groups, group features, and their staging tables.
|
|
4481
|
+
# 5. Remove data_domain entries from features and their staging tables.
|
|
4482
|
+
# 6. Remove data_domain entries from entities, entity xref, and their staging tables.
|
|
4483
|
+
# 7. Remove data_domain entries from data sources and their staging tables.
|
|
4484
|
+
# 8. Remove data_domain entries from data_domain table.
|
|
4485
|
+
|
|
4486
|
+
# 1. Remove data domain entries from the dataset catalog and dataset features.
|
|
4487
|
+
self._logger.debug(f"Removing data domain '{self.__data_domain}' entries from dataset catalog and dataset features")
|
|
4488
|
+
_delete_data(
|
|
4489
|
+
table_name=self.__table_names['dataset_catalog'],
|
|
4490
|
+
schema_name=self.__repo,
|
|
4491
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4492
|
+
)
|
|
4493
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from dataset features")
|
|
4494
|
+
|
|
4495
|
+
_delete_data(
|
|
4496
|
+
table_name=self.__table_names['dataset_features'],
|
|
4497
|
+
schema_name=self.__repo,
|
|
4498
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4499
|
+
)
|
|
4500
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from dataset features")
|
|
4501
|
+
|
|
4502
|
+
# 2. Remove data domain entries from the feature metadata.
|
|
4503
|
+
_delete_data(
|
|
4504
|
+
table_name=self.__table_names['feature_metadata'],
|
|
4505
|
+
schema_name=self.__repo,
|
|
4506
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4507
|
+
)
|
|
4508
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from feature metadata")
|
|
4509
|
+
|
|
4510
|
+
# 3. Remove data_domain entries from the feature processes.
|
|
4511
|
+
_delete_data(
|
|
4512
|
+
table_name=self.__table_names['feature_process'],
|
|
4513
|
+
schema_name=self.__repo,
|
|
4514
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4515
|
+
)
|
|
4516
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from feature processes")
|
|
4517
|
+
|
|
4518
|
+
# 4. Remove data_domain entries from feature groups, group features, and their staging tables.
|
|
4519
|
+
_delete_data(
|
|
4520
|
+
table_name=self.__table_names['group_features'],
|
|
4521
|
+
schema_name=self.__repo,
|
|
4522
|
+
delete_conditions=((Col("group_data_domain") == self.__data_domain))
|
|
4523
|
+
)
|
|
4524
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from group features")
|
|
4525
|
+
|
|
4526
|
+
_delete_data(
|
|
4527
|
+
table_name=self.__table_names['feature_group'],
|
|
4528
|
+
schema_name=self.__repo,
|
|
4529
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4530
|
+
)
|
|
4531
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from feature groups")
|
|
4532
|
+
|
|
4533
|
+
_delete_data(
|
|
4534
|
+
table_name=self.__table_names["group_features_staging"],
|
|
4535
|
+
schema_name=self.__repo,
|
|
4536
|
+
delete_conditions=(Col("group_data_domain") == self.__data_domain))
|
|
4537
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from group features staging")
|
|
4538
|
+
|
|
4539
|
+
_delete_data(
|
|
4540
|
+
table_name=self.__table_names["feature_group_staging"],
|
|
4541
|
+
schema_name=self.__repo,
|
|
4542
|
+
delete_conditions=(Col("data_domain") == self.__data_domain)
|
|
4543
|
+
)
|
|
4544
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from feature groups")
|
|
4545
|
+
|
|
4546
|
+
# 5. Remove data_domain entries from features and their staging tables.
|
|
4547
|
+
_delete_data(
|
|
4548
|
+
table_name=self.__table_names['feature'],
|
|
4549
|
+
schema_name=self.__repo,
|
|
4550
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4551
|
+
)
|
|
4552
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from features")
|
|
4553
|
+
|
|
4554
|
+
_delete_data(
|
|
4555
|
+
table_name=self.__table_names['feature_staging'],
|
|
4556
|
+
schema_name=self.__repo,
|
|
4557
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4558
|
+
)
|
|
4559
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from feature staging tables")
|
|
4560
|
+
|
|
4561
|
+
# 6. Remove data_domain entries from entities, entity xref, and their staging tables.
|
|
4562
|
+
_delete_data(
|
|
4563
|
+
table_name=self.__table_names['entity_xref'],
|
|
4564
|
+
schema_name=self.__repo,
|
|
4565
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4566
|
+
)
|
|
4567
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from entity xref")
|
|
4568
|
+
|
|
4569
|
+
_delete_data(
|
|
4570
|
+
table_name=self.__table_names['entity'],
|
|
4571
|
+
schema_name=self.__repo,
|
|
4572
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4573
|
+
)
|
|
4574
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from entities")
|
|
4575
|
+
|
|
4576
|
+
_delete_data(
|
|
4577
|
+
table_name=self.__table_names['entity_staging'],
|
|
4578
|
+
schema_name=self.__repo,
|
|
4579
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4580
|
+
)
|
|
4581
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from entity staging tables")
|
|
4582
|
+
|
|
4583
|
+
_delete_data(
|
|
4584
|
+
table_name=self.__table_names['entity_staging_xref'],
|
|
4585
|
+
schema_name=self.__repo,
|
|
4586
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4587
|
+
)
|
|
4588
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from entity staging xref tables")
|
|
4589
|
+
|
|
4590
|
+
# 7. Remove data_domain entries from data sources and their staging tables.
|
|
4591
|
+
_delete_data(
|
|
4592
|
+
table_name=self.__table_names['data_source'],
|
|
4593
|
+
schema_name=self.__repo,
|
|
4594
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4595
|
+
)
|
|
4596
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from data sources")
|
|
4597
|
+
|
|
4598
|
+
_delete_data(
|
|
4599
|
+
table_name=self.__table_names['data_source_staging'],
|
|
4600
|
+
schema_name=self.__repo,
|
|
4601
|
+
delete_conditions=(Col("data_domain") == self.data_domain)
|
|
4602
|
+
)
|
|
4603
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from data source staging tables")
|
|
4604
|
+
|
|
4605
|
+
# 8. Remove data_domain entries from data_domain table.
|
|
4606
|
+
_delete_data(
|
|
4607
|
+
table_name=self.__table_names['data_domain'],
|
|
4608
|
+
schema_name=self.__repo,
|
|
4609
|
+
delete_conditions=(Col("name") == self.__data_domain)
|
|
4610
|
+
)
|
|
4611
|
+
self._logger.debug(f"Removed data domain '{self.__data_domain}' entries from data domain table")
|
|
4612
|
+
|
|
4613
|
+
self._logger.debug(f"Successfully completed removal of data domain '{self.__data_domain}' from repository '{self.__repo}'")
|
|
4614
|
+
print(f"Data domain '{self.__data_domain}' is removed from the FeatureStore.")
|
|
4615
|
+
return True
|
|
4616
|
+
|
|
4617
|
+
def mind_map(self, feature_process=None):
|
|
4618
|
+
"""
|
|
4619
|
+
DESCRIPTION:
|
|
4620
|
+
Returns a visual mind map of the FeatureStore, showing data sources,
|
|
4621
|
+
feature processes, feature catalog, and dataset catalog, with dependencies
|
|
4622
|
+
illustrated by curves.
|
|
4623
|
+
Note:
|
|
4624
|
+
Works only in Jupyter Notebook or similar environments that support HTML rendering.
|
|
4625
|
+
|
|
4626
|
+
PARAMETERS:
|
|
4627
|
+
feature_process:
|
|
4628
|
+
Optional Argument.
|
|
4629
|
+
Specifies the feature process to filter the mind map. When specified,
|
|
4630
|
+
only the feature process and its related data sources, features, and datasets
|
|
4631
|
+
is displayed.
|
|
4632
|
+
Notes:
|
|
4633
|
+
* mind_map() display only the features which are associated with the
|
|
4634
|
+
feature process for the datasets also. For example, if Dataset is associated
|
|
4635
|
+
with Feature1, Feature2 and Feature1 is ingested by FeatureProcess1 and
|
|
4636
|
+
Feature2 is ingested by FeatureProcess2, then mind_map() displays the
|
|
4637
|
+
Dataset with Feature1 only if "feature_process" is set to FeatureProcess1.
|
|
4638
|
+
* If "feature_process" is not specified, then mind_map() displays all the
|
|
4639
|
+
feature processes, data sources, features, and datasets in the FeatureStore.
|
|
4640
|
+
Types: str OR list of str
|
|
4641
|
+
|
|
4642
|
+
RETURNS:
|
|
4643
|
+
None (displays HTML visualization)
|
|
4644
|
+
|
|
4645
|
+
RAISES:
|
|
4646
|
+
TypeError
|
|
4647
|
+
|
|
4648
|
+
EXAMPLES:
|
|
4649
|
+
# Example 1: Display the mind map of the FeatureStore with all feature processes.
|
|
4650
|
+
>>> from teradataml import DataFrame, FeatureStore
|
|
4651
|
+
>>> load_example_data("dataframe", "sales")
|
|
4652
|
+
# Create DataFrame.
|
|
4653
|
+
>>> sales_df = DataFrame("sales")
|
|
4654
|
+
>>> admissions_df = DataFrame("admissions")
|
|
4655
|
+
|
|
4656
|
+
# Create a FeatureStore for the repo 'vfs_v1'.
|
|
4657
|
+
>>> fs = FeatureStore("vfs_v1", data_domain='Analytics')
|
|
4658
|
+
FeatureStore is ready to use.
|
|
4659
|
+
|
|
4660
|
+
# Create a feature process to ingest sales df.
|
|
4661
|
+
>>> fp1 = fs.get_feature_process(object=df,
|
|
4662
|
+
... features=['Jan', 'Feb', 'Mar', 'Apr'],
|
|
4663
|
+
... entity='accounts')
|
|
4664
|
+
>>> fp1.run()
|
|
4665
|
+
Process '7b9f76d6-562c-11f0-bb98-c934b24a960f' started.
|
|
4666
|
+
Process '7b9f76d6-562c-11f0-bb98-c934b24a960f' completed.
|
|
4667
|
+
True
|
|
4668
|
+
|
|
4669
|
+
# Create a feature process to ingest admissions df.
|
|
4670
|
+
>>> fp2 = fs.get_feature_process(object=admissions_df,
|
|
4671
|
+
... features=[ 'masters', 'gpa', 'stats', 'programming', 'admitted'],
|
|
4672
|
+
... entity='id')
|
|
4673
|
+
>>> fp2.run()
|
|
4674
|
+
Process 'a5de0230-6b8e-11f0-ae70-f020ffe7fe09' started.
|
|
4675
|
+
Process 'a5de0230-6b8e-11f0-ae70-f020ffe7fe09' completed.
|
|
4676
|
+
|
|
4677
|
+
# Example 1: Display the mind map of the FeatureStore.
|
|
4678
|
+
>>> fs.mind_map()
|
|
4679
|
+
|
|
4680
|
+
# Example 2: Display the mind map of the FeatureStore for the sales feature process.
|
|
4681
|
+
>>> fs.mind_map(feature_process=fp1.process_id)
|
|
4682
|
+
|
|
4683
|
+
# Example 3: Display the mind map of the FeatureStore for admissions features.
|
|
4684
|
+
>>> fs.mind_map(feature_process=fp2.process_id)
|
|
4685
|
+
|
|
4686
|
+
# Example 4: Display the mind map of the FeatureStore for both sales and admissions feature
|
|
4687
|
+
# processes.
|
|
4688
|
+
>>> fs.mind_map(feature_process=[fp1.process_id, fp2.process_id])
|
|
4689
|
+
"""
|
|
4690
|
+
# Validate arguments
|
|
4691
|
+
argument_validation_params = []
|
|
4692
|
+
argument_validation_params.append(["feature_process", feature_process, True, (str, list), True])
|
|
4693
|
+
|
|
4694
|
+
# Validate argument types
|
|
4695
|
+
_Validators._validate_function_arguments(argument_validation_params)
|
|
4696
|
+
|
|
4697
|
+
# 1. Declare Python variables for the mind map
|
|
4698
|
+
data_sources_ = set()
|
|
4699
|
+
feature_processes_ = set()
|
|
4700
|
+
features_ = set()
|
|
4701
|
+
datasets_ = set()
|
|
4702
|
+
data_source_map = {}
|
|
4703
|
+
feature_process_map = {}
|
|
4704
|
+
dataset_feature_map = {}
|
|
4705
|
+
|
|
4706
|
+
sql = """
|
|
4707
|
+
select distinct process_id, oreplace(data_source, '"', '') as data_source, feature_names from "{}".{}
|
|
4708
|
+
where data_domain = '{}'
|
|
4709
|
+
""".format(self.__repo, EFS_DB_COMPONENTS['feature_process'], self.__data_domain)
|
|
4710
|
+
|
|
4711
|
+
# If user provides feature process, filter the SQL query.
|
|
4712
|
+
if feature_process:
|
|
4713
|
+
feature_process = UtilFuncs._as_list(feature_process)
|
|
4714
|
+
feature_process_str = ', '.join(f"'{fp}'" for fp in feature_process)
|
|
4715
|
+
sql += " and process_id in ({})".format(feature_process_str)
|
|
4716
|
+
|
|
4717
|
+
recs = execute_sql(sql)
|
|
4718
|
+
for rec in recs:
|
|
4719
|
+
process_id, data_source, feature_names = rec
|
|
4720
|
+
data_sources_.add(data_source)
|
|
4721
|
+
feature_processes_.add(process_id)
|
|
4722
|
+
feature_names = [f.strip() for f in feature_names.split(',')]
|
|
4723
|
+
features_.update(feature_names)
|
|
4724
|
+
|
|
4725
|
+
# Populate the maps.
|
|
4726
|
+
if data_source not in data_source_map:
|
|
4727
|
+
data_source_map[data_source] = []
|
|
4728
|
+
data_source_map[data_source].append(process_id)
|
|
4729
|
+
|
|
4730
|
+
if process_id not in feature_process_map:
|
|
4731
|
+
feature_process_map[process_id] = []
|
|
4732
|
+
feature_process_map[process_id].extend(feature_names)
|
|
4733
|
+
|
|
4734
|
+
# feature process map can have duplicates.
|
|
4735
|
+
feature_process_map = {k: list(set(v)) for k, v in feature_process_map.items()}
|
|
4736
|
+
|
|
4737
|
+
data_sources = [{"id": ds, "label": ds} for ds in data_sources_]
|
|
4738
|
+
feature_processes = [{"id": fp, "label": fp} for fp in feature_processes_]
|
|
4739
|
+
features = [{"id": f, "label": f} for f in features_]
|
|
4740
|
+
|
|
4741
|
+
# Create datasets and dataset_feature_map.
|
|
4742
|
+
ds_sql = """
|
|
4743
|
+
select feature_view, feature_name from
|
|
4744
|
+
"{}".{}
|
|
4745
|
+
where data_domain = '{}'
|
|
4746
|
+
""".format(self.__repo, EFS_DB_COMPONENTS['dataset_features'], self.__data_domain)
|
|
4747
|
+
|
|
4748
|
+
# If user provides a specific feature process, then show only those features in datasets.
|
|
4749
|
+
if feature_process:
|
|
4750
|
+
fp_str = ', '.join(f"'{fp}'" for fp in feature_process)
|
|
4751
|
+
ds_sql += " and feature_version IN ({})".format(fp_str)
|
|
4752
|
+
|
|
4753
|
+
recs = execute_sql(ds_sql)
|
|
4754
|
+
for rec in recs:
|
|
4755
|
+
feature_view, feature_name = rec
|
|
4756
|
+
datasets_.add(feature_view)
|
|
4757
|
+
if feature_view not in dataset_feature_map:
|
|
4758
|
+
dataset_feature_map[feature_view] = []
|
|
4759
|
+
dataset_feature_map[feature_view].append(feature_name)
|
|
4760
|
+
|
|
4761
|
+
datasets = [{"id": ds, "label": ds} for ds in datasets_]
|
|
4762
|
+
|
|
4763
|
+
# 2. Add unique suffix to all ids in the variables
|
|
4764
|
+
from time import time as epoch_seconds
|
|
4765
|
+
suffix = f"_fs_{str(epoch_seconds()).replace('.', '_')}"
|
|
4766
|
+
|
|
4767
|
+
def add_suffix_to_list(lst):
|
|
4768
|
+
return [dict(obj, id=obj["id"] + suffix) for obj in lst]
|
|
4769
|
+
|
|
4770
|
+
def add_suffix_to_dict_keys_and_values(dct):
|
|
4771
|
+
return {k + suffix: [v + suffix for v in vs] for k, vs in dct.items()}
|
|
4772
|
+
|
|
4773
|
+
data_sources_js = add_suffix_to_list(data_sources)
|
|
4774
|
+
feature_processes_js = add_suffix_to_list([obj for obj in feature_processes if not obj.get("invisible")])
|
|
4775
|
+
# Keep invisible objects for completeness in features, but filter for display if needed
|
|
4776
|
+
features_js = add_suffix_to_list(features)
|
|
4777
|
+
datasets_js = add_suffix_to_list(datasets)
|
|
4778
|
+
data_source_map_js = add_suffix_to_dict_keys_and_values(data_source_map)
|
|
4779
|
+
feature_process_map_js = add_suffix_to_dict_keys_and_values(feature_process_map)
|
|
4780
|
+
dataset_feature_map_js = add_suffix_to_dict_keys_and_values(dataset_feature_map)
|
|
4781
|
+
|
|
4782
|
+
# 3. Prepare JS variable strings
|
|
4783
|
+
import json
|
|
4784
|
+
js_data_sources = json.dumps(data_sources_js)
|
|
4785
|
+
js_feature_processes = json.dumps(feature_processes_js)
|
|
4786
|
+
js_features = json.dumps(features_js)
|
|
4787
|
+
js_datasets = json.dumps(datasets_js)
|
|
4788
|
+
js_data_source_map = json.dumps(data_source_map_js)
|
|
4789
|
+
js_feature_process_map = json.dumps(feature_process_map_js)
|
|
4790
|
+
js_dataset_feature_map = json.dumps(dataset_feature_map_js)
|
|
4791
|
+
|
|
4792
|
+
# 4. Get current GMT timestamp for display
|
|
4793
|
+
from datetime import datetime, timezone
|
|
4794
|
+
gmt_now = datetime.now(timezone.utc).strftime('%Y-%m-%d %H:%M:%S GMT')
|
|
4795
|
+
|
|
4796
|
+
# 5. Inject the JS variables, timestamp, and feature store name into the template
|
|
4797
|
+
html_ = _TD_FS_MindMap_Template\
|
|
4798
|
+
.replace("__DATA_SOURCES__", js_data_sources) \
|
|
4799
|
+
.replace("__FEATURE_PROCESSES__", js_feature_processes) \
|
|
4800
|
+
.replace("__FEATURES__", js_features) \
|
|
4801
|
+
.replace("__DATASETS__", js_datasets) \
|
|
4802
|
+
.replace("__DATA_SOURCE_MAP__", js_data_source_map) \
|
|
4803
|
+
.replace("__FEATURE_PROCESS_MAP__", js_feature_process_map) \
|
|
4804
|
+
.replace("__DATASET_FEATURE_MAP__", js_dataset_feature_map) \
|
|
4805
|
+
.replace("__MINDMAP_TIMESTAMP__", gmt_now) \
|
|
4806
|
+
.replace("__REPO__", self.__repo)\
|
|
4807
|
+
.replace("__DATA_DOMAIN__", self.__data_domain)
|
|
4808
|
+
|
|
4809
|
+
# 7. Add the unique suffix to all element IDs in the HTML/JS
|
|
4810
|
+
html_ = html_.replace("_fs_i", suffix)
|
|
4811
|
+
|
|
4812
|
+
from IPython.display import display, HTML
|
|
4813
|
+
display(HTML(html_))
|
|
4814
|
+
|