teradataml 20.0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1208) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +2762 -0
  4. teradataml/__init__.py +78 -0
  5. teradataml/_version.py +11 -0
  6. teradataml/analytics/Transformations.py +2996 -0
  7. teradataml/analytics/__init__.py +82 -0
  8. teradataml/analytics/analytic_function_executor.py +2416 -0
  9. teradataml/analytics/analytic_query_generator.py +1050 -0
  10. teradataml/analytics/byom/H2OPredict.py +514 -0
  11. teradataml/analytics/byom/PMMLPredict.py +437 -0
  12. teradataml/analytics/byom/__init__.py +16 -0
  13. teradataml/analytics/json_parser/__init__.py +133 -0
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
  15. teradataml/analytics/json_parser/json_store.py +191 -0
  16. teradataml/analytics/json_parser/metadata.py +1666 -0
  17. teradataml/analytics/json_parser/utils.py +805 -0
  18. teradataml/analytics/meta_class.py +236 -0
  19. teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
  21. teradataml/analytics/sqle/__init__.py +128 -0
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
  24. teradataml/analytics/table_operator/__init__.py +11 -0
  25. teradataml/analytics/uaf/__init__.py +82 -0
  26. teradataml/analytics/utils.py +828 -0
  27. teradataml/analytics/valib.py +1617 -0
  28. teradataml/automl/__init__.py +5835 -0
  29. teradataml/automl/autodataprep/__init__.py +493 -0
  30. teradataml/automl/custom_json_utils.py +1625 -0
  31. teradataml/automl/data_preparation.py +1384 -0
  32. teradataml/automl/data_transformation.py +1254 -0
  33. teradataml/automl/feature_engineering.py +2273 -0
  34. teradataml/automl/feature_exploration.py +1873 -0
  35. teradataml/automl/model_evaluation.py +488 -0
  36. teradataml/automl/model_training.py +1407 -0
  37. teradataml/catalog/__init__.py +2 -0
  38. teradataml/catalog/byom.py +1759 -0
  39. teradataml/catalog/function_argument_mapper.py +859 -0
  40. teradataml/catalog/model_cataloging_utils.py +491 -0
  41. teradataml/clients/__init__.py +0 -0
  42. teradataml/clients/auth_client.py +137 -0
  43. teradataml/clients/keycloak_client.py +165 -0
  44. teradataml/clients/pkce_client.py +481 -0
  45. teradataml/common/__init__.py +1 -0
  46. teradataml/common/aed_utils.py +2078 -0
  47. teradataml/common/bulk_exposed_utils.py +113 -0
  48. teradataml/common/constants.py +1669 -0
  49. teradataml/common/deprecations.py +166 -0
  50. teradataml/common/exceptions.py +147 -0
  51. teradataml/common/formula.py +743 -0
  52. teradataml/common/garbagecollector.py +666 -0
  53. teradataml/common/logger.py +1261 -0
  54. teradataml/common/messagecodes.py +518 -0
  55. teradataml/common/messages.py +262 -0
  56. teradataml/common/pylogger.py +67 -0
  57. teradataml/common/sqlbundle.py +764 -0
  58. teradataml/common/td_coltype_code_to_tdtype.py +48 -0
  59. teradataml/common/utils.py +3166 -0
  60. teradataml/common/warnings.py +36 -0
  61. teradataml/common/wrapper_utils.py +625 -0
  62. teradataml/config/__init__.py +0 -0
  63. teradataml/config/dummy_file1.cfg +5 -0
  64. teradataml/config/dummy_file2.cfg +3 -0
  65. teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
  66. teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
  67. teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
  68. teradataml/context/__init__.py +0 -0
  69. teradataml/context/aed_context.py +223 -0
  70. teradataml/context/context.py +1462 -0
  71. teradataml/data/A_loan.csv +19 -0
  72. teradataml/data/BINARY_REALS_LEFT.csv +11 -0
  73. teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
  74. teradataml/data/B_loan.csv +49 -0
  75. teradataml/data/BuoyData2.csv +17 -0
  76. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
  77. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
  78. teradataml/data/Convolve2RealsLeft.csv +5 -0
  79. teradataml/data/Convolve2RealsRight.csv +5 -0
  80. teradataml/data/Convolve2ValidLeft.csv +11 -0
  81. teradataml/data/Convolve2ValidRight.csv +11 -0
  82. teradataml/data/DFFTConv_Real_8_8.csv +65 -0
  83. teradataml/data/Employee.csv +5 -0
  84. teradataml/data/Employee_Address.csv +4 -0
  85. teradataml/data/Employee_roles.csv +5 -0
  86. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  87. teradataml/data/Mall_customer_data.csv +201 -0
  88. teradataml/data/Orders1_12mf.csv +25 -0
  89. teradataml/data/Pi_loan.csv +7 -0
  90. teradataml/data/SMOOTHED_DATA.csv +7 -0
  91. teradataml/data/TestDFFT8.csv +9 -0
  92. teradataml/data/TestRiver.csv +109 -0
  93. teradataml/data/Traindata.csv +28 -0
  94. teradataml/data/__init__.py +0 -0
  95. teradataml/data/acf.csv +17 -0
  96. teradataml/data/adaboost_example.json +34 -0
  97. teradataml/data/adaboostpredict_example.json +24 -0
  98. teradataml/data/additional_table.csv +11 -0
  99. teradataml/data/admissions_test.csv +21 -0
  100. teradataml/data/admissions_train.csv +41 -0
  101. teradataml/data/admissions_train_nulls.csv +41 -0
  102. teradataml/data/advertising.csv +201 -0
  103. teradataml/data/ageandheight.csv +13 -0
  104. teradataml/data/ageandpressure.csv +31 -0
  105. teradataml/data/amazon_reviews_25.csv +26 -0
  106. teradataml/data/antiselect_example.json +36 -0
  107. teradataml/data/antiselect_input.csv +8 -0
  108. teradataml/data/antiselect_input_mixed_case.csv +8 -0
  109. teradataml/data/applicant_external.csv +7 -0
  110. teradataml/data/applicant_reference.csv +7 -0
  111. teradataml/data/apriori_example.json +22 -0
  112. teradataml/data/arima_example.json +9 -0
  113. teradataml/data/assortedtext_input.csv +8 -0
  114. teradataml/data/attribution_example.json +34 -0
  115. teradataml/data/attribution_sample_table.csv +27 -0
  116. teradataml/data/attribution_sample_table1.csv +6 -0
  117. teradataml/data/attribution_sample_table2.csv +11 -0
  118. teradataml/data/bank_churn.csv +10001 -0
  119. teradataml/data/bank_marketing.csv +11163 -0
  120. teradataml/data/bank_web_clicks1.csv +43 -0
  121. teradataml/data/bank_web_clicks2.csv +91 -0
  122. teradataml/data/bank_web_url.csv +85 -0
  123. teradataml/data/barrier.csv +2 -0
  124. teradataml/data/barrier_new.csv +3 -0
  125. teradataml/data/betweenness_example.json +14 -0
  126. teradataml/data/bike_sharing.csv +732 -0
  127. teradataml/data/bin_breaks.csv +8 -0
  128. teradataml/data/bin_fit_ip.csv +4 -0
  129. teradataml/data/binary_complex_left.csv +11 -0
  130. teradataml/data/binary_complex_right.csv +11 -0
  131. teradataml/data/binary_matrix_complex_left.csv +21 -0
  132. teradataml/data/binary_matrix_complex_right.csv +21 -0
  133. teradataml/data/binary_matrix_real_left.csv +21 -0
  134. teradataml/data/binary_matrix_real_right.csv +21 -0
  135. teradataml/data/blood2ageandweight.csv +26 -0
  136. teradataml/data/bmi.csv +501 -0
  137. teradataml/data/boston.csv +507 -0
  138. teradataml/data/boston2cols.csv +721 -0
  139. teradataml/data/breast_cancer.csv +570 -0
  140. teradataml/data/buoydata_mix.csv +11 -0
  141. teradataml/data/burst_data.csv +5 -0
  142. teradataml/data/burst_example.json +21 -0
  143. teradataml/data/byom_example.json +34 -0
  144. teradataml/data/bytes_table.csv +4 -0
  145. teradataml/data/cal_housing_ex_raw.csv +70 -0
  146. teradataml/data/callers.csv +7 -0
  147. teradataml/data/calls.csv +10 -0
  148. teradataml/data/cars_hist.csv +33 -0
  149. teradataml/data/cat_table.csv +25 -0
  150. teradataml/data/ccm_example.json +32 -0
  151. teradataml/data/ccm_input.csv +91 -0
  152. teradataml/data/ccm_input2.csv +13 -0
  153. teradataml/data/ccmexample.csv +101 -0
  154. teradataml/data/ccmprepare_example.json +9 -0
  155. teradataml/data/ccmprepare_input.csv +91 -0
  156. teradataml/data/cfilter_example.json +12 -0
  157. teradataml/data/changepointdetection_example.json +18 -0
  158. teradataml/data/changepointdetectionrt_example.json +8 -0
  159. teradataml/data/chi_sq.csv +3 -0
  160. teradataml/data/churn_data.csv +14 -0
  161. teradataml/data/churn_emission.csv +35 -0
  162. teradataml/data/churn_initial.csv +3 -0
  163. teradataml/data/churn_state_transition.csv +5 -0
  164. teradataml/data/citedges_2.csv +745 -0
  165. teradataml/data/citvertices_2.csv +1210 -0
  166. teradataml/data/clicks2.csv +16 -0
  167. teradataml/data/clickstream.csv +13 -0
  168. teradataml/data/clickstream1.csv +11 -0
  169. teradataml/data/closeness_example.json +16 -0
  170. teradataml/data/complaints.csv +21 -0
  171. teradataml/data/complaints_mini.csv +3 -0
  172. teradataml/data/complaints_test_tokenized.csv +353 -0
  173. teradataml/data/complaints_testtoken.csv +224 -0
  174. teradataml/data/complaints_tokens_model.csv +348 -0
  175. teradataml/data/complaints_tokens_test.csv +353 -0
  176. teradataml/data/complaints_traintoken.csv +472 -0
  177. teradataml/data/computers_category.csv +1001 -0
  178. teradataml/data/computers_test1.csv +1252 -0
  179. teradataml/data/computers_train1.csv +5009 -0
  180. teradataml/data/computers_train1_clustered.csv +5009 -0
  181. teradataml/data/confusionmatrix_example.json +9 -0
  182. teradataml/data/conversion_event_table.csv +3 -0
  183. teradataml/data/corr_input.csv +17 -0
  184. teradataml/data/correlation_example.json +11 -0
  185. teradataml/data/covid_confirm_sd.csv +83 -0
  186. teradataml/data/coxhazardratio_example.json +39 -0
  187. teradataml/data/coxph_example.json +15 -0
  188. teradataml/data/coxsurvival_example.json +28 -0
  189. teradataml/data/cpt.csv +41 -0
  190. teradataml/data/credit_ex_merged.csv +45 -0
  191. teradataml/data/creditcard_data.csv +1001 -0
  192. teradataml/data/customer_loyalty.csv +301 -0
  193. teradataml/data/customer_loyalty_newseq.csv +31 -0
  194. teradataml/data/customer_segmentation_test.csv +2628 -0
  195. teradataml/data/customer_segmentation_train.csv +8069 -0
  196. teradataml/data/dataframe_example.json +173 -0
  197. teradataml/data/decisionforest_example.json +37 -0
  198. teradataml/data/decisionforestpredict_example.json +38 -0
  199. teradataml/data/decisiontree_example.json +21 -0
  200. teradataml/data/decisiontreepredict_example.json +45 -0
  201. teradataml/data/dfft2_size4_real.csv +17 -0
  202. teradataml/data/dfft2_test_matrix16.csv +17 -0
  203. teradataml/data/dfft2conv_real_4_4.csv +65 -0
  204. teradataml/data/diabetes.csv +443 -0
  205. teradataml/data/diabetes_test.csv +89 -0
  206. teradataml/data/dict_table.csv +5 -0
  207. teradataml/data/docperterm_table.csv +4 -0
  208. teradataml/data/docs/__init__.py +1 -0
  209. teradataml/data/docs/byom/__init__.py +0 -0
  210. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
  211. teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
  212. teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
  213. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  214. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
  215. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  216. teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
  217. teradataml/data/docs/byom/docs/__init__.py +0 -0
  218. teradataml/data/docs/sqle/__init__.py +0 -0
  219. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
  220. teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
  221. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
  222. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
  223. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
  224. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
  225. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
  226. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
  227. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
  228. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
  229. teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
  230. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
  231. teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
  232. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
  233. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
  234. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
  235. teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
  236. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
  237. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
  238. teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
  239. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
  240. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
  241. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
  242. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
  243. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
  244. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
  245. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
  246. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
  247. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
  248. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
  249. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
  250. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
  251. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
  252. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
  253. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
  254. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
  255. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
  256. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
  257. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
  258. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
  259. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
  260. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
  261. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
  262. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
  263. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
  264. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
  265. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
  266. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
  267. teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
  268. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
  269. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
  270. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  271. teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
  272. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
  273. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
  274. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  275. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
  276. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
  277. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
  278. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
  279. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
  280. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
  281. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
  282. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
  283. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
  284. teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
  285. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
  286. teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
  287. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
  288. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
  289. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
  290. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
  291. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
  292. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
  293. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
  294. teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
  295. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
  296. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
  297. teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
  298. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
  299. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  300. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
  301. teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
  302. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  303. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
  304. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
  305. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
  306. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
  307. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
  308. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
  309. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
  310. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
  311. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
  312. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
  313. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
  314. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
  315. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
  316. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
  317. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
  318. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  319. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
  320. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
  321. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
  322. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
  323. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
  324. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
  325. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
  326. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
  327. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
  328. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
  329. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
  330. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  331. teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
  332. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
  333. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
  334. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
  335. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
  336. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
  337. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
  338. teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
  339. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
  340. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
  341. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
  342. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
  343. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
  344. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
  345. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
  346. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  347. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  348. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
  349. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
  350. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  351. teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
  352. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
  353. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
  354. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
  355. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
  356. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  357. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
  358. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
  359. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
  360. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
  361. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
  362. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
  363. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
  364. teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
  365. teradataml/data/docs/tableoperator/__init__.py +0 -0
  366. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
  367. teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
  368. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
  369. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
  370. teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
  371. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
  372. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
  373. teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
  374. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  375. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
  376. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
  377. teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
  378. teradataml/data/docs/uaf/__init__.py +0 -0
  379. teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
  380. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
  381. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
  382. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
  383. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  384. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  385. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
  386. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
  387. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
  388. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
  389. teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
  390. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
  391. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  392. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
  393. teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
  394. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
  395. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
  396. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
  397. teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
  398. teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
  399. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  400. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
  401. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
  402. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
  403. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
  404. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  405. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
  406. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
  407. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
  408. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
  409. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
  410. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
  411. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
  412. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  413. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  414. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  415. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
  416. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
  417. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
  418. teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
  419. teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
  420. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  421. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
  422. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
  423. teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
  424. teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
  425. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
  426. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
  427. teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
  428. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  429. teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
  430. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
  431. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
  432. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
  433. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
  434. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
  435. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
  436. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
  437. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
  438. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
  439. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
  440. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  441. teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
  442. teradataml/data/dtw_example.json +18 -0
  443. teradataml/data/dtw_t1.csv +11 -0
  444. teradataml/data/dtw_t2.csv +4 -0
  445. teradataml/data/dwt2d_dataTable.csv +65 -0
  446. teradataml/data/dwt2d_example.json +16 -0
  447. teradataml/data/dwt_dataTable.csv +8 -0
  448. teradataml/data/dwt_example.json +15 -0
  449. teradataml/data/dwt_filterTable.csv +3 -0
  450. teradataml/data/dwt_filter_dim.csv +5 -0
  451. teradataml/data/emission.csv +9 -0
  452. teradataml/data/emp_table_by_dept.csv +19 -0
  453. teradataml/data/employee_info.csv +4 -0
  454. teradataml/data/employee_table.csv +6 -0
  455. teradataml/data/excluding_event_table.csv +2 -0
  456. teradataml/data/finance_data.csv +6 -0
  457. teradataml/data/finance_data2.csv +61 -0
  458. teradataml/data/finance_data3.csv +93 -0
  459. teradataml/data/finance_data4.csv +13 -0
  460. teradataml/data/fish.csv +160 -0
  461. teradataml/data/fm_blood2ageandweight.csv +26 -0
  462. teradataml/data/fmeasure_example.json +12 -0
  463. teradataml/data/followers_leaders.csv +10 -0
  464. teradataml/data/fpgrowth_example.json +12 -0
  465. teradataml/data/frequentpaths_example.json +29 -0
  466. teradataml/data/friends.csv +9 -0
  467. teradataml/data/fs_input.csv +33 -0
  468. teradataml/data/fs_input1.csv +33 -0
  469. teradataml/data/genData.csv +513 -0
  470. teradataml/data/geodataframe_example.json +40 -0
  471. teradataml/data/glass_types.csv +215 -0
  472. teradataml/data/glm_admissions_model.csv +12 -0
  473. teradataml/data/glm_example.json +56 -0
  474. teradataml/data/glml1l2_example.json +28 -0
  475. teradataml/data/glml1l2predict_example.json +54 -0
  476. teradataml/data/glmpredict_example.json +54 -0
  477. teradataml/data/gq_t1.csv +21 -0
  478. teradataml/data/grocery_transaction.csv +19 -0
  479. teradataml/data/hconvolve_complex_right.csv +5 -0
  480. teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
  481. teradataml/data/histogram_example.json +12 -0
  482. teradataml/data/hmmdecoder_example.json +79 -0
  483. teradataml/data/hmmevaluator_example.json +25 -0
  484. teradataml/data/hmmsupervised_example.json +10 -0
  485. teradataml/data/hmmunsupervised_example.json +8 -0
  486. teradataml/data/hnsw_alter_data.csv +5 -0
  487. teradataml/data/hnsw_data.csv +10 -0
  488. teradataml/data/house_values.csv +12 -0
  489. teradataml/data/house_values2.csv +13 -0
  490. teradataml/data/housing_cat.csv +7 -0
  491. teradataml/data/housing_data.csv +9 -0
  492. teradataml/data/housing_test.csv +47 -0
  493. teradataml/data/housing_test_binary.csv +47 -0
  494. teradataml/data/housing_train.csv +493 -0
  495. teradataml/data/housing_train_attribute.csv +5 -0
  496. teradataml/data/housing_train_binary.csv +437 -0
  497. teradataml/data/housing_train_parameter.csv +2 -0
  498. teradataml/data/housing_train_response.csv +493 -0
  499. teradataml/data/housing_train_segment.csv +201 -0
  500. teradataml/data/ibm_stock.csv +370 -0
  501. teradataml/data/ibm_stock1.csv +370 -0
  502. teradataml/data/identitymatch_example.json +22 -0
  503. teradataml/data/idf_table.csv +4 -0
  504. teradataml/data/idwt2d_dataTable.csv +5 -0
  505. teradataml/data/idwt_dataTable.csv +8 -0
  506. teradataml/data/idwt_filterTable.csv +3 -0
  507. teradataml/data/impressions.csv +101 -0
  508. teradataml/data/inflation.csv +21 -0
  509. teradataml/data/initial.csv +3 -0
  510. teradataml/data/insect2Cols.csv +61 -0
  511. teradataml/data/insect_sprays.csv +13 -0
  512. teradataml/data/insurance.csv +1339 -0
  513. teradataml/data/interpolator_example.json +13 -0
  514. teradataml/data/interval_data.csv +5 -0
  515. teradataml/data/iris_altinput.csv +481 -0
  516. teradataml/data/iris_attribute_output.csv +8 -0
  517. teradataml/data/iris_attribute_test.csv +121 -0
  518. teradataml/data/iris_attribute_train.csv +481 -0
  519. teradataml/data/iris_category_expect_predict.csv +31 -0
  520. teradataml/data/iris_data.csv +151 -0
  521. teradataml/data/iris_input.csv +151 -0
  522. teradataml/data/iris_response_train.csv +121 -0
  523. teradataml/data/iris_test.csv +31 -0
  524. teradataml/data/iris_train.csv +121 -0
  525. teradataml/data/join_table1.csv +4 -0
  526. teradataml/data/join_table2.csv +4 -0
  527. teradataml/data/jsons/anly_function_name.json +7 -0
  528. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  529. teradataml/data/jsons/byom/dataikupredict.json +148 -0
  530. teradataml/data/jsons/byom/datarobotpredict.json +147 -0
  531. teradataml/data/jsons/byom/h2opredict.json +195 -0
  532. teradataml/data/jsons/byom/onnxembeddings.json +267 -0
  533. teradataml/data/jsons/byom/onnxpredict.json +187 -0
  534. teradataml/data/jsons/byom/pmmlpredict.json +147 -0
  535. teradataml/data/jsons/paired_functions.json +450 -0
  536. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
  537. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
  538. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
  539. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
  540. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
  541. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
  542. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
  543. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
  544. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
  545. teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
  546. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
  547. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
  548. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
  549. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
  550. teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
  551. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
  552. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
  553. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
  554. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
  555. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
  556. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
  557. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
  558. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
  559. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
  560. teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
  561. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
  562. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
  563. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
  564. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
  565. teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
  566. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
  567. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
  568. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
  569. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
  570. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
  571. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
  572. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
  573. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
  574. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
  575. teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
  576. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
  577. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
  578. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
  579. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
  580. teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
  581. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
  582. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
  583. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
  584. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
  585. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
  586. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
  587. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
  588. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
  589. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
  590. teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
  591. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
  592. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
  593. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
  594. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
  595. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
  596. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
  597. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
  598. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
  599. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
  600. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
  601. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
  602. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
  603. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
  604. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
  605. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
  606. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
  607. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
  608. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
  609. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
  610. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
  611. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
  612. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
  613. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
  614. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
  615. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
  616. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
  617. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
  618. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
  619. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
  620. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
  621. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
  622. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
  623. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
  624. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
  625. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
  626. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
  627. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
  628. teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
  629. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
  630. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
  631. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
  632. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
  633. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
  634. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
  635. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
  636. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
  637. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
  638. teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
  639. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
  640. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
  641. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
  642. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
  643. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  644. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
  645. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
  646. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  647. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
  648. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
  649. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
  650. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
  651. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
  652. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
  653. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
  654. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
  655. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
  656. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
  657. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
  658. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
  659. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
  660. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
  661. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
  662. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
  663. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
  664. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
  665. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
  666. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
  667. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
  668. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
  669. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
  670. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  671. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  672. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  673. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
  674. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
  675. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
  676. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
  677. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
  678. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
  679. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
  680. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
  681. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
  682. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
  683. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
  684. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
  685. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  686. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
  687. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
  688. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
  689. teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
  690. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
  691. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
  692. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
  693. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
  694. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
  695. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
  696. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
  697. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  698. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
  699. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
  700. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
  701. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
  702. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
  703. teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
  704. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
  705. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
  706. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
  707. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
  708. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  709. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
  710. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
  711. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  712. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
  713. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
  714. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
  715. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  716. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
  717. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
  718. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
  719. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
  720. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
  721. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
  722. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
  723. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
  724. teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
  725. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
  726. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
  727. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
  728. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
  729. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
  730. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
  731. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
  732. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
  733. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
  734. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
  735. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
  736. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  737. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  738. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  739. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  740. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  741. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  742. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  743. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  744. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  745. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  746. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  747. teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
  748. teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
  749. teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
  750. teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
  751. teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
  752. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  753. teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
  754. teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
  755. teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
  756. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
  757. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
  758. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
  759. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  760. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  761. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
  762. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
  763. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
  764. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
  765. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
  766. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
  767. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
  768. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
  769. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
  770. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
  771. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
  772. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
  773. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
  774. teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
  775. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
  776. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  777. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  778. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
  779. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
  780. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
  781. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
  782. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
  783. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
  784. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
  785. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
  786. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  787. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  788. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
  789. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  790. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
  791. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
  792. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
  793. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  794. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
  795. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
  796. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
  797. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
  798. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
  799. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
  800. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
  801. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
  802. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  803. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
  804. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
  805. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
  806. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
  807. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
  808. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
  809. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
  810. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
  811. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
  812. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
  813. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
  814. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  815. teradataml/data/kmeans_example.json +23 -0
  816. teradataml/data/kmeans_table.csv +10 -0
  817. teradataml/data/kmeans_us_arrests_data.csv +51 -0
  818. teradataml/data/knn_example.json +19 -0
  819. teradataml/data/knnrecommender_example.json +7 -0
  820. teradataml/data/knnrecommenderpredict_example.json +12 -0
  821. teradataml/data/lar_example.json +17 -0
  822. teradataml/data/larpredict_example.json +30 -0
  823. teradataml/data/lc_new_predictors.csv +5 -0
  824. teradataml/data/lc_new_reference.csv +9 -0
  825. teradataml/data/lda_example.json +9 -0
  826. teradataml/data/ldainference_example.json +15 -0
  827. teradataml/data/ldatopicsummary_example.json +9 -0
  828. teradataml/data/levendist_input.csv +13 -0
  829. teradataml/data/levenshteindistance_example.json +10 -0
  830. teradataml/data/linreg_example.json +10 -0
  831. teradataml/data/load_example_data.py +350 -0
  832. teradataml/data/loan_prediction.csv +295 -0
  833. teradataml/data/lungcancer.csv +138 -0
  834. teradataml/data/mappingdata.csv +12 -0
  835. teradataml/data/medical_readings.csv +101 -0
  836. teradataml/data/milk_timeseries.csv +157 -0
  837. teradataml/data/min_max_titanic.csv +4 -0
  838. teradataml/data/minhash_example.json +6 -0
  839. teradataml/data/ml_ratings.csv +7547 -0
  840. teradataml/data/ml_ratings_10.csv +2445 -0
  841. teradataml/data/mobile_data.csv +13 -0
  842. teradataml/data/model1_table.csv +5 -0
  843. teradataml/data/model2_table.csv +5 -0
  844. teradataml/data/models/License_file.txt +1 -0
  845. teradataml/data/models/License_file_empty.txt +0 -0
  846. teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
  847. teradataml/data/models/dr_iris_rf +0 -0
  848. teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
  849. teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
  850. teradataml/data/models/iris_db_glm_model.pmml +57 -0
  851. teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
  852. teradataml/data/models/iris_kmeans_model +0 -0
  853. teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
  854. teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
  855. teradataml/data/modularity_example.json +12 -0
  856. teradataml/data/movavg_example.json +8 -0
  857. teradataml/data/mtx1.csv +7 -0
  858. teradataml/data/mtx2.csv +13 -0
  859. teradataml/data/multi_model_classification.csv +401 -0
  860. teradataml/data/multi_model_regression.csv +401 -0
  861. teradataml/data/mvdfft8.csv +9 -0
  862. teradataml/data/naivebayes_example.json +10 -0
  863. teradataml/data/naivebayespredict_example.json +19 -0
  864. teradataml/data/naivebayestextclassifier2_example.json +7 -0
  865. teradataml/data/naivebayestextclassifier_example.json +8 -0
  866. teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
  867. teradataml/data/name_Find_configure.csv +10 -0
  868. teradataml/data/namedentityfinder_example.json +14 -0
  869. teradataml/data/namedentityfinderevaluator_example.json +10 -0
  870. teradataml/data/namedentityfindertrainer_example.json +6 -0
  871. teradataml/data/nb_iris_input_test.csv +31 -0
  872. teradataml/data/nb_iris_input_train.csv +121 -0
  873. teradataml/data/nbp_iris_model.csv +13 -0
  874. teradataml/data/ner_dict.csv +8 -0
  875. teradataml/data/ner_extractor_text.csv +2 -0
  876. teradataml/data/ner_input_eng.csv +7 -0
  877. teradataml/data/ner_rule.csv +5 -0
  878. teradataml/data/ner_sports_test2.csv +29 -0
  879. teradataml/data/ner_sports_train.csv +501 -0
  880. teradataml/data/nerevaluator_example.json +6 -0
  881. teradataml/data/nerextractor_example.json +18 -0
  882. teradataml/data/nermem_sports_test.csv +18 -0
  883. teradataml/data/nermem_sports_train.csv +51 -0
  884. teradataml/data/nertrainer_example.json +7 -0
  885. teradataml/data/ngrams_example.json +7 -0
  886. teradataml/data/notebooks/__init__.py +0 -0
  887. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
  888. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
  889. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
  890. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
  891. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
  892. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
  893. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
  894. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
  895. teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
  896. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
  897. teradataml/data/npath_example.json +23 -0
  898. teradataml/data/ntree_example.json +14 -0
  899. teradataml/data/numeric_strings.csv +5 -0
  900. teradataml/data/numerics.csv +4 -0
  901. teradataml/data/ocean_buoy.csv +17 -0
  902. teradataml/data/ocean_buoy2.csv +17 -0
  903. teradataml/data/ocean_buoys.csv +28 -0
  904. teradataml/data/ocean_buoys2.csv +10 -0
  905. teradataml/data/ocean_buoys_nonpti.csv +28 -0
  906. teradataml/data/ocean_buoys_seq.csv +29 -0
  907. teradataml/data/onehot_encoder_train.csv +4 -0
  908. teradataml/data/openml_example.json +92 -0
  909. teradataml/data/optional_event_table.csv +4 -0
  910. teradataml/data/orders1.csv +11 -0
  911. teradataml/data/orders1_12.csv +13 -0
  912. teradataml/data/orders_ex.csv +4 -0
  913. teradataml/data/pack_example.json +9 -0
  914. teradataml/data/package_tracking.csv +19 -0
  915. teradataml/data/package_tracking_pti.csv +19 -0
  916. teradataml/data/pagerank_example.json +13 -0
  917. teradataml/data/paragraphs_input.csv +6 -0
  918. teradataml/data/pathanalyzer_example.json +8 -0
  919. teradataml/data/pathgenerator_example.json +8 -0
  920. teradataml/data/patient_profile.csv +101 -0
  921. teradataml/data/pattern_matching_data.csv +11 -0
  922. teradataml/data/payment_fraud_dataset.csv +10001 -0
  923. teradataml/data/peppers.png +0 -0
  924. teradataml/data/phrases.csv +7 -0
  925. teradataml/data/pivot_example.json +9 -0
  926. teradataml/data/pivot_input.csv +22 -0
  927. teradataml/data/playerRating.csv +31 -0
  928. teradataml/data/pos_input.csv +40 -0
  929. teradataml/data/postagger_example.json +7 -0
  930. teradataml/data/posttagger_output.csv +44 -0
  931. teradataml/data/production_data.csv +17 -0
  932. teradataml/data/production_data2.csv +7 -0
  933. teradataml/data/randomsample_example.json +32 -0
  934. teradataml/data/randomwalksample_example.json +9 -0
  935. teradataml/data/rank_table.csv +6 -0
  936. teradataml/data/real_values.csv +14 -0
  937. teradataml/data/ref_mobile_data.csv +4 -0
  938. teradataml/data/ref_mobile_data_dense.csv +2 -0
  939. teradataml/data/ref_url.csv +17 -0
  940. teradataml/data/restaurant_reviews.csv +7 -0
  941. teradataml/data/retail_churn_table.csv +27772 -0
  942. teradataml/data/river_data.csv +145 -0
  943. teradataml/data/roc_example.json +8 -0
  944. teradataml/data/roc_input.csv +101 -0
  945. teradataml/data/rule_inputs.csv +6 -0
  946. teradataml/data/rule_table.csv +2 -0
  947. teradataml/data/sales.csv +7 -0
  948. teradataml/data/sales_transaction.csv +501 -0
  949. teradataml/data/salesdata.csv +342 -0
  950. teradataml/data/sample_cities.csv +3 -0
  951. teradataml/data/sample_shapes.csv +11 -0
  952. teradataml/data/sample_streets.csv +3 -0
  953. teradataml/data/sampling_example.json +16 -0
  954. teradataml/data/sax_example.json +17 -0
  955. teradataml/data/scale_attributes.csv +3 -0
  956. teradataml/data/scale_example.json +74 -0
  957. teradataml/data/scale_housing.csv +11 -0
  958. teradataml/data/scale_housing_test.csv +6 -0
  959. teradataml/data/scale_input_part_sparse.csv +31 -0
  960. teradataml/data/scale_input_partitioned.csv +16 -0
  961. teradataml/data/scale_input_sparse.csv +11 -0
  962. teradataml/data/scale_parameters.csv +3 -0
  963. teradataml/data/scale_stat.csv +11 -0
  964. teradataml/data/scalebypartition_example.json +13 -0
  965. teradataml/data/scalemap_example.json +13 -0
  966. teradataml/data/scalesummary_example.json +12 -0
  967. teradataml/data/score_category.csv +101 -0
  968. teradataml/data/score_summary.csv +4 -0
  969. teradataml/data/script_example.json +10 -0
  970. teradataml/data/scripts/deploy_script.py +84 -0
  971. teradataml/data/scripts/lightgbm/dataset.template +175 -0
  972. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
  973. teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
  974. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
  975. teradataml/data/scripts/mapper.R +20 -0
  976. teradataml/data/scripts/mapper.py +16 -0
  977. teradataml/data/scripts/mapper_replace.py +16 -0
  978. teradataml/data/scripts/sklearn/__init__.py +0 -0
  979. teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
  980. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
  981. teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
  982. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
  983. teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
  984. teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
  985. teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
  986. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  987. teradataml/data/seeds.csv +10 -0
  988. teradataml/data/sentenceextractor_example.json +7 -0
  989. teradataml/data/sentiment_extract_input.csv +11 -0
  990. teradataml/data/sentiment_train.csv +16 -0
  991. teradataml/data/sentiment_word.csv +20 -0
  992. teradataml/data/sentiment_word_input.csv +20 -0
  993. teradataml/data/sentimentextractor_example.json +24 -0
  994. teradataml/data/sentimenttrainer_example.json +8 -0
  995. teradataml/data/sequence_table.csv +10 -0
  996. teradataml/data/seriessplitter_example.json +8 -0
  997. teradataml/data/sessionize_example.json +17 -0
  998. teradataml/data/sessionize_table.csv +116 -0
  999. teradataml/data/setop_test1.csv +24 -0
  1000. teradataml/data/setop_test2.csv +22 -0
  1001. teradataml/data/soc_nw_edges.csv +11 -0
  1002. teradataml/data/soc_nw_vertices.csv +8 -0
  1003. teradataml/data/souvenir_timeseries.csv +168 -0
  1004. teradataml/data/sparse_iris_attribute.csv +5 -0
  1005. teradataml/data/sparse_iris_test.csv +121 -0
  1006. teradataml/data/sparse_iris_train.csv +601 -0
  1007. teradataml/data/star1.csv +6 -0
  1008. teradataml/data/star_pivot.csv +8 -0
  1009. teradataml/data/state_transition.csv +5 -0
  1010. teradataml/data/stock_data.csv +53 -0
  1011. teradataml/data/stock_movement.csv +11 -0
  1012. teradataml/data/stock_vol.csv +76 -0
  1013. teradataml/data/stop_words.csv +8 -0
  1014. teradataml/data/store_sales.csv +37 -0
  1015. teradataml/data/stringsimilarity_example.json +8 -0
  1016. teradataml/data/strsimilarity_input.csv +13 -0
  1017. teradataml/data/students.csv +101 -0
  1018. teradataml/data/svm_iris_input_test.csv +121 -0
  1019. teradataml/data/svm_iris_input_train.csv +481 -0
  1020. teradataml/data/svm_iris_model.csv +7 -0
  1021. teradataml/data/svmdense_example.json +10 -0
  1022. teradataml/data/svmdensepredict_example.json +19 -0
  1023. teradataml/data/svmsparse_example.json +8 -0
  1024. teradataml/data/svmsparsepredict_example.json +14 -0
  1025. teradataml/data/svmsparsesummary_example.json +8 -0
  1026. teradataml/data/target_mobile_data.csv +13 -0
  1027. teradataml/data/target_mobile_data_dense.csv +5 -0
  1028. teradataml/data/target_udt_data.csv +8 -0
  1029. teradataml/data/tdnerextractor_example.json +14 -0
  1030. teradataml/data/templatedata.csv +1201 -0
  1031. teradataml/data/templates/open_source_ml.json +11 -0
  1032. teradataml/data/teradata_icon.ico +0 -0
  1033. teradataml/data/teradataml_example.json +1473 -0
  1034. teradataml/data/test_classification.csv +101 -0
  1035. teradataml/data/test_loan_prediction.csv +53 -0
  1036. teradataml/data/test_pacf_12.csv +37 -0
  1037. teradataml/data/test_prediction.csv +101 -0
  1038. teradataml/data/test_regression.csv +101 -0
  1039. teradataml/data/test_river2.csv +109 -0
  1040. teradataml/data/text_inputs.csv +6 -0
  1041. teradataml/data/textchunker_example.json +8 -0
  1042. teradataml/data/textclassifier_example.json +7 -0
  1043. teradataml/data/textclassifier_input.csv +7 -0
  1044. teradataml/data/textclassifiertrainer_example.json +7 -0
  1045. teradataml/data/textmorph_example.json +11 -0
  1046. teradataml/data/textparser_example.json +15 -0
  1047. teradataml/data/texttagger_example.json +12 -0
  1048. teradataml/data/texttokenizer_example.json +7 -0
  1049. teradataml/data/texttrainer_input.csv +11 -0
  1050. teradataml/data/tf_example.json +7 -0
  1051. teradataml/data/tfidf_example.json +14 -0
  1052. teradataml/data/tfidf_input1.csv +201 -0
  1053. teradataml/data/tfidf_train.csv +6 -0
  1054. teradataml/data/time_table1.csv +535 -0
  1055. teradataml/data/time_table2.csv +14 -0
  1056. teradataml/data/timeseriesdata.csv +1601 -0
  1057. teradataml/data/timeseriesdatasetsd4.csv +105 -0
  1058. teradataml/data/timestamp_data.csv +4 -0
  1059. teradataml/data/titanic.csv +892 -0
  1060. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  1061. teradataml/data/to_num_data.csv +4 -0
  1062. teradataml/data/tochar_data.csv +5 -0
  1063. teradataml/data/token_table.csv +696 -0
  1064. teradataml/data/train_multiclass.csv +101 -0
  1065. teradataml/data/train_regression.csv +101 -0
  1066. teradataml/data/train_regression_multiple_labels.csv +101 -0
  1067. teradataml/data/train_tracking.csv +28 -0
  1068. teradataml/data/trans_dense.csv +16 -0
  1069. teradataml/data/trans_sparse.csv +55 -0
  1070. teradataml/data/transformation_table.csv +6 -0
  1071. teradataml/data/transformation_table_new.csv +2 -0
  1072. teradataml/data/tv_spots.csv +16 -0
  1073. teradataml/data/twod_climate_data.csv +117 -0
  1074. teradataml/data/uaf_example.json +529 -0
  1075. teradataml/data/univariatestatistics_example.json +9 -0
  1076. teradataml/data/unpack_example.json +10 -0
  1077. teradataml/data/unpivot_example.json +25 -0
  1078. teradataml/data/unpivot_input.csv +8 -0
  1079. teradataml/data/url_data.csv +10 -0
  1080. teradataml/data/us_air_pass.csv +37 -0
  1081. teradataml/data/us_population.csv +624 -0
  1082. teradataml/data/us_states_shapes.csv +52 -0
  1083. teradataml/data/varmax_example.json +18 -0
  1084. teradataml/data/vectordistance_example.json +30 -0
  1085. teradataml/data/ville_climatedata.csv +121 -0
  1086. teradataml/data/ville_tempdata.csv +12 -0
  1087. teradataml/data/ville_tempdata1.csv +12 -0
  1088. teradataml/data/ville_temperature.csv +11 -0
  1089. teradataml/data/waveletTable.csv +1605 -0
  1090. teradataml/data/waveletTable2.csv +1605 -0
  1091. teradataml/data/weightedmovavg_example.json +9 -0
  1092. teradataml/data/wft_testing.csv +5 -0
  1093. teradataml/data/windowdfft.csv +16 -0
  1094. teradataml/data/wine_data.csv +1600 -0
  1095. teradataml/data/word_embed_input_table1.csv +6 -0
  1096. teradataml/data/word_embed_input_table2.csv +5 -0
  1097. teradataml/data/word_embed_model.csv +23 -0
  1098. teradataml/data/words_input.csv +13 -0
  1099. teradataml/data/xconvolve_complex_left.csv +6 -0
  1100. teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
  1101. teradataml/data/xgboost_example.json +36 -0
  1102. teradataml/data/xgboostpredict_example.json +32 -0
  1103. teradataml/data/ztest_example.json +16 -0
  1104. teradataml/dataframe/__init__.py +0 -0
  1105. teradataml/dataframe/copy_to.py +2446 -0
  1106. teradataml/dataframe/data_transfer.py +2840 -0
  1107. teradataml/dataframe/dataframe.py +20908 -0
  1108. teradataml/dataframe/dataframe_utils.py +2114 -0
  1109. teradataml/dataframe/fastload.py +794 -0
  1110. teradataml/dataframe/functions.py +2110 -0
  1111. teradataml/dataframe/indexer.py +424 -0
  1112. teradataml/dataframe/row.py +160 -0
  1113. teradataml/dataframe/setop.py +1171 -0
  1114. teradataml/dataframe/sql.py +10904 -0
  1115. teradataml/dataframe/sql_function_parameters.py +440 -0
  1116. teradataml/dataframe/sql_functions.py +652 -0
  1117. teradataml/dataframe/sql_interfaces.py +220 -0
  1118. teradataml/dataframe/vantage_function_types.py +675 -0
  1119. teradataml/dataframe/window.py +694 -0
  1120. teradataml/dbutils/__init__.py +3 -0
  1121. teradataml/dbutils/dbutils.py +2871 -0
  1122. teradataml/dbutils/filemgr.py +318 -0
  1123. teradataml/gen_ai/__init__.py +2 -0
  1124. teradataml/gen_ai/convAI.py +473 -0
  1125. teradataml/geospatial/__init__.py +4 -0
  1126. teradataml/geospatial/geodataframe.py +1105 -0
  1127. teradataml/geospatial/geodataframecolumn.py +392 -0
  1128. teradataml/geospatial/geometry_types.py +926 -0
  1129. teradataml/hyperparameter_tuner/__init__.py +1 -0
  1130. teradataml/hyperparameter_tuner/optimizer.py +4115 -0
  1131. teradataml/hyperparameter_tuner/utils.py +303 -0
  1132. teradataml/lib/__init__.py +0 -0
  1133. teradataml/lib/aed_0_1.dll +0 -0
  1134. teradataml/lib/libaed_0_1.dylib +0 -0
  1135. teradataml/lib/libaed_0_1.so +0 -0
  1136. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  1137. teradataml/lib/libaed_0_1_ppc64le.so +0 -0
  1138. teradataml/opensource/__init__.py +1 -0
  1139. teradataml/opensource/_base.py +1321 -0
  1140. teradataml/opensource/_class.py +464 -0
  1141. teradataml/opensource/_constants.py +61 -0
  1142. teradataml/opensource/_lightgbm.py +949 -0
  1143. teradataml/opensource/_sklearn.py +1008 -0
  1144. teradataml/opensource/_wrapper_utils.py +267 -0
  1145. teradataml/options/__init__.py +148 -0
  1146. teradataml/options/configure.py +489 -0
  1147. teradataml/options/display.py +187 -0
  1148. teradataml/plot/__init__.py +3 -0
  1149. teradataml/plot/axis.py +1427 -0
  1150. teradataml/plot/constants.py +15 -0
  1151. teradataml/plot/figure.py +431 -0
  1152. teradataml/plot/plot.py +810 -0
  1153. teradataml/plot/query_generator.py +83 -0
  1154. teradataml/plot/subplot.py +216 -0
  1155. teradataml/scriptmgmt/UserEnv.py +4273 -0
  1156. teradataml/scriptmgmt/__init__.py +3 -0
  1157. teradataml/scriptmgmt/lls_utils.py +2157 -0
  1158. teradataml/sdk/README.md +79 -0
  1159. teradataml/sdk/__init__.py +4 -0
  1160. teradataml/sdk/_auth_modes.py +422 -0
  1161. teradataml/sdk/_func_params.py +487 -0
  1162. teradataml/sdk/_json_parser.py +453 -0
  1163. teradataml/sdk/_openapi_spec_constants.py +249 -0
  1164. teradataml/sdk/_utils.py +236 -0
  1165. teradataml/sdk/api_client.py +900 -0
  1166. teradataml/sdk/constants.py +62 -0
  1167. teradataml/sdk/modelops/__init__.py +98 -0
  1168. teradataml/sdk/modelops/_client.py +409 -0
  1169. teradataml/sdk/modelops/_constants.py +304 -0
  1170. teradataml/sdk/modelops/models.py +2308 -0
  1171. teradataml/sdk/spinner.py +107 -0
  1172. teradataml/series/__init__.py +0 -0
  1173. teradataml/series/series.py +537 -0
  1174. teradataml/series/series_utils.py +71 -0
  1175. teradataml/store/__init__.py +12 -0
  1176. teradataml/store/feature_store/__init__.py +0 -0
  1177. teradataml/store/feature_store/constants.py +658 -0
  1178. teradataml/store/feature_store/feature_store.py +4814 -0
  1179. teradataml/store/feature_store/mind_map.py +639 -0
  1180. teradataml/store/feature_store/models.py +7330 -0
  1181. teradataml/store/feature_store/utils.py +390 -0
  1182. teradataml/table_operators/Apply.py +979 -0
  1183. teradataml/table_operators/Script.py +1739 -0
  1184. teradataml/table_operators/TableOperator.py +1343 -0
  1185. teradataml/table_operators/__init__.py +2 -0
  1186. teradataml/table_operators/apply_query_generator.py +262 -0
  1187. teradataml/table_operators/query_generator.py +493 -0
  1188. teradataml/table_operators/table_operator_query_generator.py +462 -0
  1189. teradataml/table_operators/table_operator_util.py +726 -0
  1190. teradataml/table_operators/templates/dataframe_apply.template +184 -0
  1191. teradataml/table_operators/templates/dataframe_map.template +176 -0
  1192. teradataml/table_operators/templates/dataframe_register.template +73 -0
  1193. teradataml/table_operators/templates/dataframe_udf.template +67 -0
  1194. teradataml/table_operators/templates/script_executor.template +170 -0
  1195. teradataml/telemetry_utils/__init__.py +0 -0
  1196. teradataml/telemetry_utils/queryband.py +53 -0
  1197. teradataml/utils/__init__.py +0 -0
  1198. teradataml/utils/docstring.py +527 -0
  1199. teradataml/utils/dtypes.py +943 -0
  1200. teradataml/utils/internal_buffer.py +122 -0
  1201. teradataml/utils/print_versions.py +206 -0
  1202. teradataml/utils/utils.py +451 -0
  1203. teradataml/utils/validators.py +3305 -0
  1204. teradataml-20.0.0.8.dist-info/METADATA +2804 -0
  1205. teradataml-20.0.0.8.dist-info/RECORD +1208 -0
  1206. teradataml-20.0.0.8.dist-info/WHEEL +5 -0
  1207. teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
  1208. teradataml-20.0.0.8.dist-info/zip-safe +1 -0
@@ -0,0 +1,2273 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2025 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Sweta Shaw
7
+ # Email Id: Sweta.Shaw@Teradata.com
8
+ #
9
+ # Secondary Owner: Akhil Bisht
10
+ # Email Id: AKHIL.BISHT@Teradata.com
11
+ #
12
+ # Version: 1.1
13
+ # Function Version: 1.0
14
+ # ##################################################################
15
+
16
+ # Python libraries
17
+ import pandas as pd
18
+ import time
19
+ import json
20
+ import re
21
+
22
+ # Teradata libraries
23
+ from teradataml.dataframe.dataframe import DataFrame
24
+ from teradataml.dataframe.copy_to import copy_to_sql
25
+ from teradataml import Antiselect
26
+ from teradataml import BincodeFit, BincodeTransform
27
+ from teradataml import CategoricalSummary, ColumnSummary, ConvertTo, GetFutileColumns, FillRowId
28
+ from teradataml import Fit, Transform
29
+ from teradataml import NonLinearCombineFit, NonLinearCombineTransform
30
+ from teradataml import NumApply
31
+ from teradataml import OneHotEncodingFit, OneHotEncodingTransform
32
+ from teradataml import OrdinalEncodingFit, OrdinalEncodingTransform
33
+ from teradataml import SimpleImputeFit, SimpleImputeTransform
34
+ from teradataml import StrApply
35
+ from teradataml import TargetEncodingFit, TargetEncodingTransform
36
+ from sqlalchemy import literal_column
37
+ from teradatasqlalchemy import INTEGER
38
+ from teradataml import display
39
+ from teradataml.common.garbagecollector import GarbageCollector
40
+ from teradataml.dataframe.sql_functions import case
41
+ from teradataml.hyperparameter_tuner.utils import _ProgressBar
42
+ from teradataml.utils.validators import _Validators
43
+ from teradataml.common.utils import UtilFuncs
44
+ from teradataml.common.constants import TeradataConstants
45
+ from teradataml.options.configure import configure
46
+
47
+
48
+ class _FeatureEngineering:
49
+
50
+ def __init__(self,
51
+ data,
52
+ target_column,
53
+ id_column,
54
+ model_list,
55
+ verbose=0,
56
+ task_type="Regression",
57
+ custom_data=None,
58
+ **kwargs):
59
+ """
60
+ DESCRIPTION:
61
+ Function initializes the data, target column and columns datatypes
62
+ for feature engineering.
63
+
64
+ PARAMETERS:
65
+ data:
66
+ Required Argument.
67
+ Specifies the input teradataml DataFrame for feature engineering.
68
+ Types: teradataml Dataframe
69
+
70
+ target_column:
71
+ Required Argument.
72
+ Specifies the name of the target column in "data"..
73
+ Types: str
74
+
75
+ id_column:
76
+ Required Argument.
77
+ Specifies the name of the unique identifier column in "data".
78
+ Types: str
79
+
80
+ model_list:
81
+ Required Argument.
82
+ Specifies the list of models to be used for model training.
83
+ Types: list
84
+
85
+ verbose:
86
+ Optional Argument.
87
+ Specifies the detailed execution steps based on verbose level.
88
+ Default Value: 0
89
+ Permitted Values:
90
+ * 0: prints the progress bar and leaderboard
91
+ * 1: prints the execution steps of AutoML.
92
+ * 2: prints the intermediate data between the execution of each step of AutoML.
93
+ Types: int
94
+
95
+ task_type:
96
+ Required Argument.
97
+ Specifies the task type for AutoML, whether to apply regresion OR classification OR clustering
98
+ on the provived dataset.
99
+ Default Value: "Regression"
100
+ Permitted Values: "Regression", "Classification", "Clustering"
101
+ Types: str
102
+
103
+ custom_data:
104
+ Optional Argument.
105
+ Specifies json object containing user customized input.
106
+ Types: json object
107
+
108
+ **kwargs:
109
+ Specifies the additional arguments for feature engineering. Below
110
+ are the additional arguments:
111
+ volatile:
112
+ Optional Argument.
113
+ Specifies whether to put the interim results of the
114
+ functions in a volatile table or not. When set to
115
+ True, results are stored in a volatile table,
116
+ otherwise not.
117
+ Default Value: False
118
+ Types: bool
119
+
120
+ persist:
121
+ Optional Argument.
122
+ Specifies whether to persist the interim results of the
123
+ functions in a table or not. When set to True,
124
+ results are persisted in a table; otherwise,
125
+ results are garbage collected at the end of the
126
+ session.
127
+ Default Value: False
128
+ Types: bool
129
+
130
+ cluster:
131
+ Optional Argument.
132
+ Specifies whether to apply clustering techniques.
133
+ Default Value: False
134
+ Types: bool
135
+
136
+ progress_prefix:
137
+ Optional Argument.
138
+ Specifies the prefix for the progress bar messages.
139
+ Default Value: None
140
+ Types: str.
141
+
142
+ automl_phases:
143
+ Optional Argument.
144
+ Specifies the phase of AutoML to be executed.
145
+ Default Value: None
146
+ Types: str or list of str.
147
+
148
+ auto_dataprep:
149
+ Optional Argument.
150
+ Specifies whether to run AutoDataPrep workflow.
151
+ Default Value: False
152
+ Types: bool
153
+
154
+ enable_lasso:
155
+ Optional Argument.
156
+ Specifies whether to use lasso regression for feature selection.
157
+ By default, only RFE and PCA are used for feature selection.
158
+ Default Value: False
159
+ Types: bool
160
+
161
+ RETURNS:
162
+ None
163
+
164
+ RAISES:
165
+ None
166
+
167
+ EXAMPLES:
168
+ >>> _FeatureEngineering(data=df, target_column="target",
169
+ ... id_column="id", model_list=["xgboost"], verbose=1)
170
+ """
171
+ # Instance variables
172
+ self.data = data
173
+ self.target_column = target_column
174
+ self.id_column = id_column
175
+ self.model_list = model_list
176
+ self.verbose = verbose
177
+ self.task_type = task_type
178
+ self.custom_data = custom_data
179
+ self.excluded_cols=[]
180
+ self.data_types = {key: value for key, value in self.data._column_names_and_types}
181
+ self.target_label = None
182
+
183
+ self.one_hot_obj_count = 0
184
+ self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
185
+ self.persist = kwargs.get('persist', False)
186
+ self.volatile = kwargs.get('volatile', False) or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and self.persist is False)
187
+ self.cluster = kwargs.get('cluster', False)
188
+
189
+ self.data_mapping = {}
190
+ self.progress_prefix = kwargs.get('progress_prefix', None)
191
+ self.aml_phases = kwargs.get('automl_phases', None)
192
+ self.auto_dataprep = kwargs.get('auto_dataprep', False)
193
+ self.enable_lasso = kwargs.get('enable_lasso', False)
194
+
195
+ # Method for doing feature engineering on data -> adding id, removing futile col, imputation, encoding(one hot)
196
+ def feature_engineering(self,
197
+ auto=True):
198
+ """
199
+ DESCRIPTION:
200
+ Function performs following operations :-
201
+ 1. Removes futile columns/features from dataset.
202
+ 2. Detects the columns with missing values.
203
+ 3. Performs imputation on these columns with missing values.
204
+ 4. Detects categorical columns and perform encoding on those columns.
205
+
206
+ PARAMETERS:
207
+ auto:
208
+ Optional Argument.
209
+ Specifies whether to run AutoML in custom mode or auto mode.
210
+ When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
211
+ Default Value: True
212
+ Types: bool
213
+
214
+ RETURNS:
215
+ tuple containing, teradataml DataFrame, list of excluded columns,
216
+ target label information, data transformation dictionary, and data mapping dictionary.
217
+
218
+ RAISES:
219
+ TeradataMlException
220
+
221
+ EXAMPLES:
222
+ >>> data, excluded_cols, target_label, transform_dict, data_mapping = self.feature_engineering(auto=True)
223
+ """
224
+ # Assigning number of base jobs for progress bar.
225
+ if self.cluster:
226
+ base_jobs = 11 if auto else 15
227
+ else:
228
+ # Base jobs for supervised learning: add extra job when lasso selection is enabled
229
+ base_jobs = (12 if self.enable_lasso else 11) if auto else (17 if self.enable_lasso else 16)
230
+
231
+ # Updating model list based on distinct value of target column for classification type
232
+ if self.is_classification_type():
233
+ if self.data.drop_duplicate(self.target_column).size > 2:
234
+ unsupported_models = ['svm', 'glm'] # Models that don't support multiclass
235
+ for model in unsupported_models:
236
+ if model in self.model_list:
237
+ self._display_msg(inline_msg="Multi-class classification is "
238
+ "not supported by {} model. Skipping {} model."
239
+ .format(model, model))
240
+ self.model_list = [model for model in self.model_list if model not in unsupported_models]
241
+
242
+ # After filtering models like glm/svm due to multiclass
243
+ if not self.auto_dataprep:
244
+ _Validators._validate_non_empty_list_or_valid_selection(self.model_list, "List of models")
245
+
246
+ # Updating number of jobs for progress bar based on number of models.
247
+ jobs = base_jobs + len(self.model_list)
248
+ self.progress_bar = _ProgressBar(jobs=jobs,
249
+ verbose=2,
250
+ prefix=self.progress_prefix)
251
+
252
+ self._display_heading(phase=1,
253
+ progress_bar=self.progress_bar,
254
+ automl_phases=self.aml_phases)
255
+
256
+ self._display_msg(msg='Feature Engineering started ...',
257
+ progress_bar=self.progress_bar)
258
+
259
+ # Storing target column to data transform dictionary
260
+ # Setting target column for supervised learning, for clustering it will be None.
261
+ if not self.cluster:
262
+ self.data_transform_dict['data_target_column'] = self.target_column
263
+ else:
264
+ self.data_transform_dict['data_target_column'] = None
265
+
266
+ # Storing target column encoding indicator to data transform dictionary
267
+ if "target_col_encode_ind" not in self.data_transform_dict:
268
+ self.data_transform_dict["target_col_encode_ind"] = False
269
+
270
+
271
+ # Storing task type to data transform dictionary
272
+ if not self.cluster:
273
+ self.data_transform_dict['classification_type'] = self.is_classification_type()
274
+ else:
275
+ self.data_transform_dict['classification_type'] = False
276
+ # Storing params for performing one hot encoding
277
+ self.data_transform_dict['one_hot_encoding_fit_obj'] = {}
278
+ self.data_transform_dict['one_hot_encoding_drop_list'] = []
279
+
280
+ if auto:
281
+ self._remove_duplicate_rows()
282
+ self.progress_bar.update()
283
+
284
+ self._remove_futile_columns()
285
+ self.progress_bar.update()
286
+
287
+ self._handle_date_columns()
288
+ self.progress_bar.update()
289
+
290
+ self._handling_missing_value()
291
+ self.progress_bar.update()
292
+
293
+ self._impute_missing_value()
294
+ self.progress_bar.update()
295
+
296
+ self._encoding_categorical_columns()
297
+ self.progress_bar.update()
298
+
299
+ else:
300
+ self._remove_duplicate_rows()
301
+ self.progress_bar.update()
302
+
303
+ self._anti_select_columns()
304
+ self.progress_bar.update()
305
+
306
+ self._remove_futile_columns()
307
+ self.progress_bar.update()
308
+
309
+ self._handle_date_columns()
310
+ self.progress_bar.update()
311
+
312
+ self._custom_handling_missing_value()
313
+ self.progress_bar.update()
314
+
315
+ self._bin_code_transformation()
316
+ self.progress_bar.update()
317
+
318
+ self._string_manipulation()
319
+ self.progress_bar.update()
320
+
321
+ self._custom_categorical_encoding()
322
+ self.progress_bar.update()
323
+
324
+ self._mathematical_transformation()
325
+ self.progress_bar.update()
326
+
327
+ self._non_linear_transformation()
328
+ self.progress_bar.update()
329
+
330
+ return self.data, self.excluded_cols, self.target_label, self.data_transform_dict, self.data_mapping
331
+
332
+ def _extract_list(self,
333
+ list1,
334
+ list2):
335
+ """
336
+ DESCRIPTION:
337
+ Function to extract elements from list1 which are not present in list2.
338
+
339
+ PARAMETERS:
340
+ list1:
341
+ Required Argument.
342
+ Specifies the first list for extracting elements from.
343
+ Types: list
344
+
345
+ list2:
346
+ Required Argument.
347
+ Specifies the second list to get elements for avoiding in first list while extracting.
348
+ Types: list
349
+
350
+ RETURNS:
351
+ list containing extracted elements.
352
+
353
+ RAISES:
354
+ None
355
+
356
+ EXAMPLES:
357
+ >>> result = self._extract_list(list1=["a", "b", "c"], list2=["b"])
358
+ """
359
+ # Ensure list1 and list2 are lists, default to empty list if None
360
+ if list1 is None:
361
+ list1 = []
362
+ if list2 is None:
363
+ list2 = []
364
+ new_lst = list(set(list1) - set(list2))
365
+ return new_lst
366
+
367
+ def _remove_duplicate_rows(self):
368
+ """
369
+ DESCRIPTION:
370
+ Function to handles duplicate rows present in dataset.
371
+
372
+ PARAMETERS:
373
+ None
374
+
375
+ RETURNS:
376
+ None
377
+
378
+ RAISES:
379
+ None
380
+
381
+ EXAMPLES:
382
+ >>> self._remove_duplicate_rows()
383
+ """
384
+ self._display_msg(msg="Handling duplicate records present in dataset ...",
385
+ progress_bar=self.progress_bar,
386
+ show_data=True)
387
+ start_time = time.time()
388
+ rows = self.data.shape[0]
389
+ self.data=self.data.drop_duplicate(self.data.columns)
390
+ if rows != self.data.shape[0]:
391
+ self._display_msg(msg=f'Updated dataset sample after removing {rows-self.data.shape[0]} duplicate records:',
392
+ data=self.data,
393
+ progress_bar=self.progress_bar)
394
+ self._display_msg(inline_msg=f"Remaining Rows in the data: {self.data.shape[0]}\n"\
395
+ f"Remaining Columns in the data: {self.data.shape[1]}",
396
+ progress_bar=self.progress_bar)
397
+ else:
398
+ self._display_msg(inline_msg="Analysis completed. No action taken.",
399
+ progress_bar=self.progress_bar)
400
+
401
+ end_time = time.time()
402
+ self._display_msg(msg="Total time to handle duplicate records: {:.2f} sec ".format(end_time - start_time),
403
+ progress_bar=self.progress_bar,
404
+ show_data=True)
405
+
406
+ def _get_distinct_count(self):
407
+ """
408
+ DESCRIPTION:
409
+ Function to get distinct count for all features and store it in dictionary for further use.
410
+
411
+ PARAMETERS:
412
+ None
413
+
414
+ RETURNS:
415
+ None
416
+
417
+ RAISES:
418
+ None
419
+
420
+ EXAMPLES:
421
+ >>> self._get_distinct_count()
422
+ """
423
+ # Count of distinct value in each column
424
+ counts = self.data.select(self.data.columns).count(distinct=True)
425
+
426
+ # Dict containing disctinct value in each column
427
+ self.counts_dict = next(counts.itertuples())._asdict()
428
+
429
+ def _preprocess_data(self):
430
+ """
431
+ DESCRIPTION:
432
+ Function replaces the existing id column or adds the new id column and
433
+ removes columns with single value/same values in the dataset.
434
+
435
+ PARAMETERS:
436
+ None
437
+
438
+ RETURNS:
439
+ None
440
+
441
+ RAISES:
442
+ None
443
+
444
+ EXAMPLES:
445
+ >>> self._preprocess_data()
446
+ """
447
+ # Get distinct value in each column
448
+ self._get_distinct_count()
449
+ # Columns to removed if count of distinct value = 1
450
+ columns_to_be_removed = [col for col in self.data.columns if self.counts_dict[f'count_{col}'] == 1]
451
+ # Removing irrelevant columns
452
+ if len(columns_to_be_removed) != 0:
453
+ self.data = self.data.drop(columns_to_be_removed, axis=1)
454
+ # Storing irrelevant column list in data transform dictionary
455
+ self.data_transform_dict['drop_irrelevant_columns'] = columns_to_be_removed
456
+
457
+ if self.id_column == 'automl_id':
458
+ # Adding id columns
459
+ obj = FillRowId(data=self.data, row_id_column='automl_id')
460
+ self.data = obj.result
461
+
462
+ # Storing id column to data transform dictionary
463
+ self.data_transform_dict['data_id_column'] = self.id_column
464
+
465
+ def _remove_futile_columns(self):
466
+ """
467
+ DESCRIPTION:
468
+ Function removes the futile columns from dataset.
469
+
470
+ PARAMETERS:
471
+ None
472
+
473
+ RETURNS:
474
+ None
475
+
476
+ RAISES:
477
+ None
478
+
479
+ EXAMPLES:
480
+ >>> self._remove_futile_columns()
481
+ """
482
+ self._display_msg(msg="Handling less significant features from data ...",
483
+ progress_bar=self.progress_bar,
484
+ show_data=True)
485
+ start_time = time.time()
486
+
487
+ self._preprocess_data()
488
+
489
+ # Handling string type target column in classification
490
+ # Performing Ordinal Encoding
491
+ if not self.cluster:
492
+ if self.data_types[self.target_column] in ['str']:
493
+ self._ordinal_encoding([self.target_column])
494
+
495
+ # Detecting categorical columns
496
+ categorical_columns = [col for col, d_type in self.data._column_names_and_types if d_type == 'str']
497
+
498
+ # Detecting and removing futile columns, if categorical_column exists
499
+ if len(categorical_columns) != 0:
500
+
501
+ obj = CategoricalSummary(data=self.data,
502
+ target_columns=categorical_columns,
503
+ volatile=self.volatile,
504
+ persist=self.persist)
505
+
506
+ gfc_out = GetFutileColumns(data=self.data,
507
+ object=obj,
508
+ category_summary_column="ColumnName",
509
+ threshold_value =0.7,
510
+ volatile=self.volatile,
511
+ persist=self.persist)
512
+
513
+ # Extracting Futile columns
514
+ f_cols = [row[0] for row in gfc_out.result.itertuples()]
515
+
516
+ self.data_mapping['categorical_summary'] = obj.result._table_name
517
+ self.data_mapping['futile_columns'] = gfc_out.result._table_name
518
+
519
+ if len(f_cols) == 0:
520
+ self._display_msg(inline_msg="Analysis indicates all categorical columns are significant. No action Needed.",
521
+ progress_bar=self.progress_bar)
522
+ else:
523
+
524
+ self.data = self.data.drop(f_cols, axis=1)
525
+ # Storing futile column list in data transform dictionary
526
+ self.data_transform_dict['futile_columns'] = f_cols
527
+
528
+ if self.persist:
529
+ table_name = UtilFuncs._generate_temp_table_name(table_type=TeradataConstants.TERADATA_TABLE,
530
+ gc_on_quit=False)
531
+ self.data.to_sql(table_name)
532
+ else:
533
+ self.data.materialize()
534
+
535
+ self.data_mapping['data_without_futile_columns'] = self.data._table_name
536
+ self._display_msg(msg='Removing Futile columns:',
537
+ col_lst=f_cols,
538
+ progress_bar=self.progress_bar)
539
+ self._display_msg(msg='Sample of Data after removing Futile columns:',
540
+ data=self.data,
541
+ progress_bar=self.progress_bar)
542
+ end_time= time.time()
543
+ self._display_msg(msg="Total time to handle less significant features: {:.2f} sec ".format( end_time - start_time),
544
+ progress_bar=self.progress_bar,
545
+ show_data=True)
546
+
547
+ def _fetch_date_component(self):
548
+ """
549
+ DESCRIPTION:
550
+ Function to fetch day of week, week of month, month of quarter, quarter of year
551
+ component from date column. Generate weekend and month half details from day of week and
552
+ week of month columns respectively. Convert quarter of year and month of quarter
553
+ component columns to VARCHAR.
554
+
555
+ PARAMETERS:
556
+ None
557
+
558
+ RETURNS:
559
+ list of newly generated date component features.
560
+
561
+ RAISES:
562
+ None
563
+
564
+ EXAMPLES:
565
+ >>> new_features = self._fetch_date_component()
566
+ """
567
+ # List for storing newly generated date component features
568
+ new_date_components=[]
569
+ # Extracting weekend, month, quarter details information from date columns
570
+ date_component_param={}
571
+ for col in self.date_column_list:
572
+ # Generating new column names for extracted date components
573
+ weekend_col = f'{col}_weekend'
574
+ month_half_col = f'{col}_month_half'
575
+ month_of_quarter_col=f'{col}_month_of_quarter'
576
+ quarter_of_year_col=f'{col}_quarter_of_year'
577
+
578
+ date_component_param = {
579
+ **date_component_param,
580
+ weekend_col: case([(self.data[col].day_of_week().isin([1, 7]), 'yes')], else_='no'),
581
+ month_half_col: case([(self.data[col].week_of_month().isin([1, 2]), 'first_half')], else_='second_half'),
582
+ month_of_quarter_col: self.data[col].month_of_quarter(),
583
+ quarter_of_year_col: self.data[col].quarter_of_year()
584
+ }
585
+ # Storing newly generated date component month and quarter columns.
586
+ # Skipping day of week and week of month columns as they will be used
587
+ # later for extracting weekend and month part details.
588
+ new_date_components.extend([weekend_col, month_half_col, month_of_quarter_col, quarter_of_year_col])
589
+ # Adding new date component columns to dataset
590
+ self.data=self.data.assign(**date_component_param)
591
+ # Dropping date columns as different component columns are extracted.
592
+ self.data = self.data.drop(self.date_column_list, axis=1)
593
+
594
+ # Converting remaining component columns to VARCHAR
595
+ # So that it will be treated as categorical columns
596
+ remaining_component_columns = [col for col in self.data.columns if re.search('month_of_quarter|quarter_of_year'+"$", col)]
597
+ accumulate_columns = self._extract_list(self.data.columns, remaining_component_columns)
598
+ convertto_params = {
599
+ "data" : self.data,
600
+ "target_columns" : remaining_component_columns,
601
+ "target_datatype" : ["VARCHAR(charlen=20,charset=UNICODE,casespecific=NO)"],
602
+ "accumulate" : accumulate_columns,
603
+ "persist" : True
604
+ }
605
+ # Disabling display table name if persist is True by default
606
+ if not self.volatile and not self.persist:
607
+ convertto_params["display_table_name"] = False
608
+
609
+ # Setting persist to False if volatile is True
610
+ if self.volatile:
611
+ convertto_params["persist"] = False
612
+ convertto_params["volatile"] = True
613
+
614
+ # returning dataset after performing string manipulation
615
+ self.data = ConvertTo(**convertto_params).result
616
+
617
+ # IF volatile is False and persist is False
618
+ if not self.volatile and not self.persist:
619
+ # Adding transformed data containing table to garbage collector
620
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
621
+ return new_date_components
622
+
623
+ def _handle_date_columns_helper(self):
624
+ """
625
+ DESCRIPTION:
626
+ Function for dropping irrelevant date features. Perform extraction of different
627
+ component from relevant date features and transform them.
628
+
629
+ PARAMETERS:
630
+ None
631
+
632
+ RETURNS:
633
+ None
634
+
635
+ RAISES:
636
+ None
637
+
638
+ EXAMPLES:
639
+ >>> self._handle_date_columns_helper()
640
+ """
641
+
642
+ # Dropping missing value for all date columns
643
+ self._display_msg(msg="Dropping missing values for:",
644
+ col_lst=self.date_column_list,
645
+ progress_bar=self.progress_bar)
646
+
647
+ self.data = self.data.dropna(subset=self.date_column_list)
648
+
649
+ # Date columns list eligible for dropping from dataset
650
+ drop_date_cols = []
651
+
652
+ # Checking for unique valued date columns
653
+ for col in self.date_column_list:
654
+ if self.data.drop_duplicate(col).size == self.data.shape[0]:
655
+ drop_date_cols.append(col)
656
+
657
+ if len(drop_date_cols) != 0:
658
+ self.data = self.data.drop(drop_date_cols, axis=1)
659
+ # Storing unique date column list in data transform dictionary
660
+ self.data_transform_dict['drop_unique_date_columns'] = drop_date_cols
661
+ self._display_msg(msg='Dropping date features with all unique value:',
662
+ col_lst = drop_date_cols,
663
+ progress_bar=self.progress_bar)
664
+ # Updated date column list after dropping irrelevant date columns
665
+ self.date_column_list = [item for item in self.date_column_list if item not in drop_date_cols]
666
+
667
+ if len(self.date_column_list) != 0:
668
+
669
+ # List for storing newly generated date component features
670
+ new_columns=self._fetch_date_component()
671
+ self._display_msg(msg='List of newly generated features from existing date features:',
672
+ col_lst=new_columns,
673
+ progress_bar=self.progress_bar)
674
+ # Dropping columns with all unique values or single value
675
+ drop_cols=[]
676
+ for col in new_columns:
677
+ distinct_rows = self.data.drop_duplicate(col).size
678
+ if distinct_rows == self.data.shape[0]:
679
+ drop_cols.append(col)
680
+ self._display_msg(msg='Dropping features with all unique values:',
681
+ col_lst=col,
682
+ progress_bar=self.progress_bar)
683
+
684
+ elif distinct_rows == 1:
685
+ drop_cols.append(col)
686
+ self._display_msg(msg='Dropping features with single value:',
687
+ col_lst=col,
688
+ progress_bar=self.progress_bar)
689
+
690
+ # Dropping columns from drop_cols list
691
+ if len(drop_cols) != 0:
692
+ self.data = self.data.drop(drop_cols, axis=1)
693
+ # Storing extract date component list for drop in data transform dictionary
694
+ self.data_transform_dict['drop_extract_date_columns'] = drop_cols
695
+ # Extracting all newly generated columns
696
+ new_columns = [item for item in new_columns if item not in drop_cols]
697
+
698
+ self._display_msg(msg='Updated list of newly generated features from existing date features :',
699
+ col_lst=new_columns,
700
+ progress_bar=self.progress_bar)
701
+
702
+ self._display_msg(msg='Updated dataset sample after handling date features:',
703
+ data=self.data,
704
+ progress_bar=self.progress_bar)
705
+ else:
706
+ self._display_msg(inline_msg="No useful date feature found",
707
+ progress_bar=self.progress_bar)
708
+
709
+ def _handle_date_columns(self):
710
+ """
711
+ DESCRIPTION:
712
+ Function to handle date columns in dataset if any.
713
+ Perform relevant transformation by extracting different components, i.e., Day, Month and Year.
714
+
715
+ PARAMETERS:
716
+ None
717
+
718
+ RETURNS:
719
+ None
720
+
721
+ RAISES:
722
+ None
723
+
724
+ EXAMPLES:
725
+ >>> self._handle_date_columns()
726
+ """
727
+ self._display_msg(msg="Handling Date Features ...",
728
+ progress_bar=self.progress_bar,
729
+ show_data=True)
730
+ start_time = time.time()
731
+
732
+ self.date_column_list = [col for col, d_type in self.data._column_names_and_types \
733
+ if d_type in ["datetime.date","datetime.datetime"]]
734
+
735
+ if len(self.date_column_list) == 0:
736
+ self._display_msg(inline_msg="Analysis Completed. Dataset does not contain any feature related to dates. No action needed.",
737
+ progress_bar=self.progress_bar)
738
+ else:
739
+ # Storing date column list in data transform dictionary
740
+ self.data_transform_dict['date_columns'] = self.date_column_list
741
+ self._handle_date_columns_helper()
742
+ if self.persist:
743
+ table_name = UtilFuncs._generate_temp_table_name(table_type=TeradataConstants.TERADATA_TABLE,
744
+ gc_on_quit=False)
745
+ self.data.to_sql(table_name)
746
+ else:
747
+ self.data.materialize()
748
+ self.data_mapping['data_after_date_handling'] = self.data._table_name
749
+
750
+ end_time = time.time()
751
+ self._display_msg(msg="Total time to handle date features: {:.2f} sec".format(end_time-start_time),
752
+ progress_bar=self.progress_bar,
753
+ show_data=True)
754
+
755
+ def _missing_count_per_column(self):
756
+ """
757
+ DESCRIPTION:
758
+ Function finds and returns a dictionary containing list of columns
759
+ with missing values.
760
+
761
+ PARAMETERS:
762
+ None
763
+
764
+ RETURNS:
765
+ dict, keys represent column names and
766
+ values represent the missing value count for corresponding column.
767
+
768
+ RAISES:
769
+ None
770
+
771
+ EXAMPLES:
772
+ >>> missing_cols = self._missing_count_per_column()
773
+ """
774
+
775
+ # Removing rows with missing target column value
776
+ if not self.cluster:
777
+ self.data = self.data.dropna(subset=[self.target_column])
778
+
779
+ params = {
780
+ "data": self.data,
781
+ "target_columns": self.data.columns,
782
+ "persist": True,
783
+ "display_table_name": False
784
+ }
785
+
786
+ obj = ColumnSummary(**params)
787
+
788
+ # Adding transformed data containing table to garbage collector
789
+ GarbageCollector._add_to_garbagecollector(obj.result._table_name)
790
+
791
+ cols_miss_val={}
792
+ # Iterating over each row in the column summary result
793
+ for row in obj.result.itertuples():
794
+ # Checking if the third element of the row (missing values count) is greater than 0
795
+ if row[3] > 0:
796
+ # If so, add an entry to the 'cols_miss_val' dictionary
797
+ # Key: column name (first element of the row)
798
+ # Value: count of missing values in the column (third element of the row)
799
+ cols_miss_val[row[0]] = row[3]
800
+
801
+ return cols_miss_val
802
+
803
+ def _handling_missing_value(self):
804
+ """
805
+ DESCRIPTION:
806
+ Function detects the missing values in the each feature of dataset,
807
+ then performs these operations based on condition:
808
+ 1. deleting rows from columns/feature
809
+ 2. dropping columns from dataset
810
+
811
+ PARAMETERS:
812
+ None
813
+
814
+ RETURNS:
815
+ None
816
+
817
+ RAISES:
818
+ None
819
+
820
+ EXAMPLES:
821
+ >>> self._handling_missing_value()
822
+ """
823
+ self._display_msg(msg="Checking Missing values in dataset ...",
824
+ progress_bar=self.progress_bar,
825
+ show_data=True)
826
+ start_time = time.time()
827
+
828
+ # Flag for missing values
829
+ msg_val_found=0
830
+
831
+ #num of rows
832
+ d_size = self.data.shape[0]
833
+
834
+ delete_rows = []
835
+ drop_cols = []
836
+ self.imputation_cols = {}
837
+
838
+ cols_miss_val = self._missing_count_per_column()
839
+
840
+ if len(cols_miss_val) != 0:
841
+ self._display_msg(msg="Columns with their missing values:",
842
+ col_lst=cols_miss_val,
843
+ progress_bar=self.progress_bar)
844
+
845
+ # Get distinct value in each column
846
+ self._get_distinct_count()
847
+
848
+ # Iterating over columns with missing values
849
+ for col,val in cols_miss_val.items():
850
+
851
+ # Drop col, if count of missing value > 60%
852
+ if val > .6*d_size:
853
+ drop_cols.append(col)
854
+ continue
855
+
856
+ # For clustering tasks, all columns with missing values are sent directly to imputation
857
+ if self.cluster:
858
+ self.imputation_cols[col] = val
859
+ continue
860
+
861
+ if self.data_types[col] in ['float', 'int']:
862
+ corr_df = self.data[col].corr(self.data[self.target_column])
863
+ corr_val = self.data.assign(True, corr_=corr_df)
864
+ related = next(corr_val.itertuples())[0]
865
+
866
+ # Delete row, if count of missing value < 2% and
867
+ # Relation b/w target column and numeric column <= .25
868
+ if val < .02*d_size and related <= .25:
869
+ delete_rows.append(col)
870
+ continue
871
+
872
+ elif self.data_types[col] in ['str']:
873
+ # Delete row, if count of missing value < 4%
874
+ if val < .04*d_size:
875
+ delete_rows.append(col)
876
+ continue
877
+ # Drop col, if unique count of column > 75%
878
+ elif self.counts_dict[f'count_{col}'] > .75*(d_size-val):
879
+ drop_cols.append(col)
880
+ continue
881
+
882
+ # Remaining column for imputation
883
+ self.imputation_cols[col] = val
884
+ # Storing columns with missing value for imputation in data transform dictionary
885
+ self.data_transform_dict['imputation_columns'] = self.imputation_cols
886
+
887
+ if len(delete_rows) != 0:
888
+ rows = self.data.shape[0]
889
+ self.data = self.data.dropna(subset=delete_rows)
890
+ msg_val_found=1
891
+ self._display_msg(msg='Deleting rows of these columns for handling missing values:',
892
+ col_lst=delete_rows,
893
+ progress_bar=self.progress_bar)
894
+ self._display_msg(msg=f'Sample of dataset after removing {rows-self.data.shape[0]} rows:',
895
+ data=self.data,
896
+ progress_bar=self.progress_bar)
897
+
898
+ if len(drop_cols) != 0:
899
+ self.data = self.data.drop(drop_cols, axis=1)
900
+ msg_val_found=1
901
+ # Storing columns with missing value for drop in data transform dictionary
902
+ self.data_transform_dict['drop_missing_columns'] = drop_cols
903
+ self._display_msg(msg='Dropping these columns for handling missing values:',
904
+ col_lst=drop_cols,
905
+ progress_bar=self.progress_bar)
906
+ self._display_msg(msg=f'Sample of dataset after removing {len(drop_cols)} columns:',
907
+ data=self.data,
908
+ progress_bar=self.progress_bar)
909
+
910
+ if len(self.imputation_cols) == 0 and msg_val_found ==0:
911
+ self._display_msg(inline_msg="Analysis Completed. No Missing Values Detected.",
912
+ progress_bar=self.progress_bar)
913
+
914
+ end_time = time.time()
915
+ self._display_msg(msg="Total time to find missing values in data: {:.2f} sec ".format( end_time - start_time),
916
+ progress_bar=self.progress_bar,
917
+ show_data=True)
918
+
919
+ def _impute_helper(self):
920
+ """
921
+ DESCRIPTION:
922
+ Function decides the imputation methods [mean/ median/ mode] for columns with missing values
923
+ on the basis of skewness of column in the dataset.
924
+
925
+ PARAMETERS:
926
+ None
927
+
928
+ RETURNS:
929
+ A tuple containing,
930
+ col_stat (name of columns with missing value)
931
+ stat (imputation method for respective columns)
932
+
933
+ RAISES:
934
+ None
935
+
936
+ EXAMPLES:
937
+ >>> col_stat, stat = self._impute_helper()
938
+ """
939
+ col_stat = []
940
+ stat = []
941
+
942
+ # Converting o/p of skew() into dictonary with key as column name and value as skewness value
943
+ df = self.data.skew()
944
+ skew_data = next(df.itertuples())._asdict()
945
+
946
+ # Iterating over columns with missing value
947
+ for key, val in self.imputation_cols.items():
948
+
949
+ col_stat.append(key)
950
+ if self.data_types[key] in ['float', 'int', 'decimal.Decimal']:
951
+ val = skew_data[f'skew_{key}']
952
+ # Median imputation method, if abs(skewness value) > 1
953
+ if abs(val) > 1:
954
+ stat.append('median')
955
+ # Mean imputation method, if abs(skewness value) <= 1
956
+ else:
957
+ stat.append('mean')
958
+ # Mode imputation method, if categorical column
959
+ elif self.data_types[key] in ['str']:
960
+ stat.append('mode')
961
+
962
+ self._display_msg(msg="Columns with their imputation method:",
963
+ col_lst=dict(zip(col_stat, stat)),
964
+ progress_bar=self.progress_bar)
965
+
966
+ return col_stat, stat
967
+
968
+ def _impute_missing_value(self):
969
+ """
970
+ DESCRIPTION:
971
+ Function performs the imputation on columns/features with missing values in the dataset.
972
+
973
+ PARAMETERS:
974
+ None
975
+
976
+ RETURNS:
977
+ None
978
+
979
+ RAISES:
980
+ None
981
+
982
+ EXAMPLES:
983
+ >>> self._impute_missing_value()
984
+ """
985
+
986
+ start_time = time.time()
987
+ self._display_msg(msg="Imputing Missing Values ...",
988
+ progress_bar=self.progress_bar,
989
+ show_data=True)
990
+
991
+ if len(self.imputation_cols) != 0:
992
+
993
+ # List of columns and imputation Method
994
+ col_stat, stat = self._impute_helper()
995
+
996
+ fit_obj = SimpleImputeFit(data=self.data,
997
+ stats_columns=col_stat,
998
+ stats=stat,
999
+ volatile=self.volatile,
1000
+ persist=self.persist)
1001
+
1002
+ # Storing fit object for imputation in data transform dictionary
1003
+ self.data_transform_dict['imputation_fit_object'] = fit_obj.output
1004
+ sm = SimpleImputeTransform(data=self.data,
1005
+ object=fit_obj,
1006
+ volatile=self.volatile,
1007
+ persist=self.persist)
1008
+
1009
+ self.data = sm.result
1010
+ self.data_mapping['fit_simpleimpute_output'] = fit_obj.output_data._table_name
1011
+ self.data_mapping['fit_simpleimpute_result'] = fit_obj.output._table_name
1012
+ self.data_mapping['data_without_missing_values'] = self.data._table_name
1013
+ self._display_msg(msg="Sample of dataset after Imputation:",
1014
+ data=self.data,
1015
+ progress_bar=self.progress_bar)
1016
+ else:
1017
+ self._display_msg(inline_msg="Analysis completed. No imputation required.",
1018
+ progress_bar=self.progress_bar)
1019
+
1020
+ end_time = time.time()
1021
+ self._display_msg(msg="Time taken to perform imputation: {:.2f} sec ".format(end_time - start_time),
1022
+ progress_bar=self.progress_bar,
1023
+ show_data=True)
1024
+
1025
+ def _custom_handling_missing_value(self):
1026
+ """
1027
+ DESCRIPTION:
1028
+ Function to perform customized missing value handling for features based on user input.
1029
+
1030
+ PARAMETERS:
1031
+ None
1032
+
1033
+ RETURNS:
1034
+ None
1035
+
1036
+ RAISES:
1037
+ TeradataMlException
1038
+
1039
+ EXAMPLES:
1040
+ >>> self._custom_handling_missing_value()
1041
+ """
1042
+ # Fetching user input for performing missing value handling
1043
+ missing_handling_input = self.custom_data.get("MissingValueHandlingIndicator", False)
1044
+
1045
+ if missing_handling_input:
1046
+ # Fetching parameters required for performing
1047
+ missing_handling_param = self.custom_data.get("MissingValueHandlingParam", None)
1048
+ if missing_handling_param:
1049
+ # Fetching user input for different methods missing value handling
1050
+ drop_col_ind = missing_handling_param.get("DroppingColumnIndicator", False)
1051
+ drop_row_ind = missing_handling_param.get("DroppingRowIndicator", False)
1052
+ impute_ind = missing_handling_param.get("ImputeMissingIndicator", False)
1053
+ volatile = missing_handling_param.pop("volatile", False)
1054
+ persist = missing_handling_param.pop("persist", False)
1055
+ # Checking for user input if all methods indicator are false or not
1056
+ if not any([drop_col_ind, drop_row_ind, impute_ind]):
1057
+ self._display_msg(inline_msg="No method information provided for performing customized missing value handling. \
1058
+ AutoML will proceed with default missing value handling method.",
1059
+ progress_bar=self.progress_bar)
1060
+
1061
+ else:
1062
+ # Checking user input for dropping missing value columns
1063
+ if drop_col_ind:
1064
+ drop_col_list = missing_handling_param.get("DroppingColumnList", [])
1065
+ # Storing customcolumns with missing value for drop in data transform dictionary
1066
+ self.data_transform_dict["custom_drop_missing_columns"] = drop_col_list
1067
+ if len(drop_col_list):
1068
+ # Checking for column present in dataset or not
1069
+ _Validators._validate_dataframe_has_argument_columns(drop_col_list, "DroppingColumnList", self.data, "df")
1070
+
1071
+ self._display_msg(msg="Dropping these columns for handling customized missing value:",
1072
+ col_lst=drop_col_list,
1073
+ progress_bar=self.progress_bar)
1074
+ self.data = self.data.drop(drop_col_list, axis=1)
1075
+ else:
1076
+ self._display_msg(inline_msg="No information provided for dropping missing value containing columns.",
1077
+ progress_bar=self.progress_bar)
1078
+
1079
+ # Checking user input for dropping missing value rows
1080
+ if drop_row_ind:
1081
+ drop_row_list = missing_handling_param.get("DroppingRowList", [])
1082
+ if len(drop_row_list):
1083
+ # Checking for column present in dataset or not
1084
+ _Validators._validate_dataframe_has_argument_columns(drop_row_list, "DroppingRowList", self.data, "df")
1085
+
1086
+ self._display_msg(msg="Dropping missing rows in these columns for handling customized missing value:",
1087
+ col_lst=drop_row_list,
1088
+ progress_bar=self.progress_bar)
1089
+ self.data = self.data.dropna(subset = drop_row_list)
1090
+ else:
1091
+ self._display_msg(inline_msg="No information provided for dropping missing value containing rows.",
1092
+ progress_bar=self.progress_bar)
1093
+ # Checking user input for missing value imputation
1094
+ if impute_ind:
1095
+ stat_list = missing_handling_param.get("StatImputeList", None)
1096
+ stat_method = missing_handling_param.get("StatImputeMethod", None)
1097
+ literal_list = missing_handling_param.get("LiteralImputeList", None)
1098
+ literal_value = missing_handling_param.get("LiteralImputeValue", None)
1099
+
1100
+ # Checking for column present in dataset or not
1101
+ _Validators._validate_dataframe_has_argument_columns(stat_list, "StatImputeList", self.data, "df")
1102
+
1103
+ _Validators._validate_dataframe_has_argument_columns(literal_list, "LiteralImputeList", self.data, "df")
1104
+
1105
+ # Creating fit params
1106
+ fit_param = {
1107
+ "data" : self.data,
1108
+ "stats_columns" : stat_list,
1109
+ "stats" : stat_method,
1110
+ "literals_columns" : literal_list,
1111
+ "literals" : literal_value,
1112
+ "volatile" : volatile,
1113
+ "persist" : persist
1114
+ }
1115
+ # Fitting on dataset
1116
+ fit_obj = SimpleImputeFit(**fit_param)
1117
+ # Storing custom fit object for imputation in data transform dictionary
1118
+ self.data_transform_dict["custom_imputation_ind"] = True
1119
+ self.data_transform_dict["custom_imputation_fit_object"] = fit_obj.output
1120
+ # Creating transform params
1121
+ transform_param = {
1122
+ "data" : self.data,
1123
+ "object" : fit_obj.output,
1124
+ "persist" : True
1125
+ }
1126
+ # Disabling display table name if persist is True by default
1127
+ if not volatile and not persist:
1128
+ transform_param["display_table_name"] = False
1129
+
1130
+ if volatile:
1131
+ transform_param["volatile"] = True
1132
+ transform_param["persist"] = False
1133
+ # Updating dataset with transform result
1134
+ self.data = SimpleImputeTransform(**transform_param).result
1135
+
1136
+ self.data_mapping['fit_simpleimpute_output'] = fit_obj.output_data._table_name
1137
+ self.data_mapping['fit_simpleimpute_result'] = fit_obj.output._table_name
1138
+ self.data_mapping['data_without_missing_values'] = self.data._table_name
1139
+
1140
+ if not volatile and not persist:
1141
+ # Adding transformed data containing table to garbage collector
1142
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1143
+ self._display_msg(msg="Updated dataset sample after performing customized missing value imputation:",
1144
+ data=self.data,
1145
+ progress_bar=self.progress_bar)
1146
+ else:
1147
+ self._display_msg(inline_msg="No information provided for performing customized missing value handling. \
1148
+ AutoML will proceed with default missing value handling method.",
1149
+ progress_bar=self.progress_bar)
1150
+ else:
1151
+ self._display_msg(inline_msg="Proceeding with default option for missing value imputation.",
1152
+ progress_bar=self.progress_bar)
1153
+
1154
+ # Proceeding with default method for handling remaining missing values
1155
+ self._display_msg(inline_msg="Proceeding with default option for handling remaining missing values.",
1156
+ progress_bar=self.progress_bar)
1157
+ self._handling_missing_value()
1158
+ self._impute_missing_value()
1159
+
1160
+ def _bin_code_transformation(self):
1161
+ """
1162
+ DESCRIPTION:
1163
+ Function to perform customized binning on features based on user input.
1164
+
1165
+ PARAMETERS:
1166
+ None
1167
+
1168
+ RETURNS:
1169
+ None
1170
+
1171
+ RAISES:
1172
+ TeradataMlException
1173
+
1174
+ EXAMPLES:
1175
+ >>> self._bin_code_transformation()
1176
+ """
1177
+ # Fetching user input for performing bin code transformation.
1178
+ bin_code_input = self.custom_data.get("BincodeIndicator", False)
1179
+
1180
+ if bin_code_input:
1181
+ # Storing custom bin code transformation indicator in data transform dictionary
1182
+ self.data_transform_dict['custom_bincode_ind'] = True
1183
+ # Fetching list required for performing transfomation.
1184
+ extracted_col = self.custom_data.get("BincodeParam", None)
1185
+ if not extracted_col:
1186
+ self._display_msg(inline_msg="BincodeParam is empty. Skipping customized bincode transformation.",
1187
+ progress_bar=self.progress_bar)
1188
+ else:
1189
+ # Creating list for storing column and binning informartion for performing transformation
1190
+ equal_width_bin_list = []
1191
+ equal_width_bin_columns = []
1192
+ var_width_bin_list = []
1193
+ var_width_bin_columns = []
1194
+ volatile = extracted_col.pop("volatile", False)
1195
+ persist = extracted_col.pop("persist", False)
1196
+
1197
+ # Checking for column present in dataset or not
1198
+ _Validators._validate_dataframe_has_argument_columns(list(extracted_col.keys()), "BincodeParam", self.data, "df")
1199
+
1200
+ for col,transform_val in extracted_col.items():
1201
+ # Fetching type of binning to be performed
1202
+ bin_trans_type = transform_val["Type"]
1203
+ # Fetching number of bins to be created
1204
+ num_bin = transform_val["NumOfBins"]
1205
+ # Checking for bin types and adding details into lists for binning
1206
+ if bin_trans_type == "Equal-Width":
1207
+ bins = num_bin
1208
+ equal_width_bin_list.append(bins)
1209
+ equal_width_bin_columns.append(col)
1210
+ elif bin_trans_type == "Variable-Width":
1211
+ var_width_bin_columns.append(col)
1212
+ bins = num_bin
1213
+ for i in range(1, bins+1):
1214
+ # Forming binning name as per expected input
1215
+ temp="Bin_"+str(i)
1216
+ # Fetching required details for variable type binning
1217
+ minval = transform_val[temp]["min_value"]
1218
+ maxval = transform_val[temp]["max_value"]
1219
+ label = transform_val[temp]["label"]
1220
+ # Appending information of each bin
1221
+ var_width_bin_list.append({ "ColumnName":col, "MinValue":minval, "MaxValue":maxval, "Label":label})
1222
+ # Checking column list for performing binning with Equal-Width.
1223
+ if len(equal_width_bin_columns) != 0:
1224
+ # Adding fit parameter for performing binning with Equal-Width.
1225
+ fit_params={
1226
+ "data" : self.data,
1227
+ "target_columns": equal_width_bin_columns,
1228
+ "method_type" : "Equal-Width",
1229
+ "nbins" : bins,
1230
+ "volatile" : volatile,
1231
+ "persist" : persist
1232
+ }
1233
+ eql_bin_code_fit = BincodeFit(**fit_params)
1234
+ # Storing fit object and column list for Equal-Width binning in data transform dictionary
1235
+ self.data_transform_dict['custom_eql_bincode_col'] = equal_width_bin_columns
1236
+ self.data_transform_dict['custom_eql_bincode_fit_object'] = eql_bin_code_fit.output
1237
+ # Extracting accumulate columns
1238
+ accumulate_columns = self._extract_list(self.data.columns, equal_width_bin_columns)
1239
+ # Adding transform parameters for performing binning with Equal-Width.
1240
+ eql_transform_params = {
1241
+ "data" : self.data,
1242
+ "object" : eql_bin_code_fit.output,
1243
+ "accumulate" : accumulate_columns,
1244
+ "persist" : True
1245
+ }
1246
+ # Disabling display table name if persist is True by default
1247
+ if not volatile and not persist:
1248
+ eql_transform_params["display_table_name"] = False
1249
+
1250
+ if volatile:
1251
+ eql_transform_params["volatile"] = True
1252
+ eql_transform_params["persist"] = False
1253
+ self.data = BincodeTransform(**eql_transform_params).result
1254
+ if not volatile and not persist:
1255
+ # Adding transformed data containing table to garbage collector
1256
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1257
+
1258
+ self.data_mapping['fit_eql_width'] = eql_bin_code_fit.output._table_name
1259
+ self.data_mapping['eql_width_bincoded_data'] = self.data._table_name
1260
+
1261
+ self._display_msg(msg="Updated dataset sample after performing Equal-Width binning :-",
1262
+ data=self.data,
1263
+ progress_bar=self.progress_bar)
1264
+ else:
1265
+ self._display_msg(inline_msg="No information provided for Equal-Width Transformation.",
1266
+ progress_bar=self.progress_bar)
1267
+
1268
+ if len(var_width_bin_columns) != 0:
1269
+ # Creating pandas dataframe and then teradata dataframe for storing binning information
1270
+ var_bin_table = pd.DataFrame(var_width_bin_list, columns=["ColumnName", "MinValue", "MaxValue", "Label"])
1271
+ self._display_msg(msg="Variable-Width binning information:-",
1272
+ data=var_bin_table,
1273
+ progress_bar=self.progress_bar)
1274
+ copy_to_sql(df=var_bin_table, table_name="automl_bincode_var_fit", temporary=True)
1275
+ var_fit_input = DataFrame.from_table("automl_bincode_var_fit")
1276
+ fit_params = {
1277
+ "data" : self.data,
1278
+ "fit_data": var_fit_input,
1279
+ "fit_data_order_column" : ["MinValue", "MaxValue"],
1280
+ "target_columns": var_width_bin_columns,
1281
+ "minvalue_column" : "MinValue",
1282
+ "maxvalue_column" : "MaxValue",
1283
+ "label_column" : "Label",
1284
+ "method_type" : "Variable-Width",
1285
+ "label_prefix" : "label_prefix",
1286
+ "volatile" : volatile,
1287
+ "persist" : persist
1288
+ }
1289
+ var_bin_code_fit = BincodeFit(**fit_params)
1290
+ # Storing fit object and column list for Variable-Width binning in data transform dictionary
1291
+ self.data_transform_dict['custom_var_bincode_col'] = var_width_bin_columns
1292
+ self.data_transform_dict['custom_var_bincode_fit_object'] = var_bin_code_fit.output
1293
+ accumulate_columns = self._extract_list(self.data.columns, var_width_bin_columns)
1294
+ var_transform_params = {
1295
+ "data" : self.data,
1296
+ "object" : var_bin_code_fit.output,
1297
+ "object_order_column" : "TD_MinValue_BINFIT",
1298
+ "accumulate" : accumulate_columns,
1299
+ "persist" : True
1300
+ }
1301
+ # Disabling display table name if persist is True by default
1302
+ if not volatile and not persist:
1303
+ var_transform_params["display_table_name"] = False
1304
+
1305
+ if volatile:
1306
+ var_transform_params["volatile"] = True
1307
+ var_transform_params["persist"] = False
1308
+ self.data = BincodeTransform(**var_transform_params).result
1309
+ self.data_mapping['fit_var_width'] = var_bin_code_fit.output._table_name
1310
+ self.data_mapping['var_width_bincoded_data'] = self.data._table_name
1311
+ if not volatile and not persist:
1312
+ # Adding transformed data containing table to garbage collector
1313
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1314
+ self._display_msg(msg="Updated dataset sample after performing Variable-Width binning:",
1315
+ data=self.data,
1316
+ progress_bar=self.progress_bar)
1317
+ else:
1318
+ self._display_msg(inline_msg="No information provided for Variable-Width Transformation.",
1319
+ progress_bar=self.progress_bar)
1320
+ else:
1321
+ self._display_msg(inline_msg="No information provided for Variable-Width Transformation.",
1322
+ progress_bar=self.progress_bar)
1323
+
1324
+ def _string_manipulation(self):
1325
+ """
1326
+ DESCRIPTION:
1327
+ Function to perform customized string manipulations on categorical features based on user input.
1328
+
1329
+ PARAMETERS:
1330
+ None
1331
+
1332
+ RETURNS:
1333
+ None
1334
+
1335
+ RAISES:
1336
+ TeradataMlException
1337
+
1338
+ EXAMPLES:
1339
+ >>> self._string_manipulation()
1340
+ """
1341
+ # Fetching user input for performing string manipulation.
1342
+ str_mnpl_input = self.custom_data.get("StringManipulationIndicator", False)
1343
+ # Checking user input for string manipulation on categrical features.
1344
+ if str_mnpl_input:
1345
+ # Storing custom string manipulation indicator in data transform dictionary
1346
+ self.data_transform_dict['custom_string_manipulation_ind'] = True
1347
+ # Fetching list required for performing operation.
1348
+ extracted_col = self.custom_data.get("StringManipulationParam", None).copy()
1349
+ if not extracted_col:
1350
+ self._display_msg(inline_msg="No information provided for performing string manipulation.",
1351
+ progress_bar=self.progress_bar)
1352
+ else:
1353
+ volatile = extracted_col.pop("volatile", False)
1354
+ persist = extracted_col.pop("persist", False)
1355
+ # Checking for column present in dataset or not
1356
+ _Validators._validate_dataframe_has_argument_columns(list(extracted_col.keys()), "StringManipulationParam", self.data, "df")
1357
+
1358
+ for target_col,transform_val in extracted_col.items():
1359
+ self.data = self._str_method_mapping(target_col, transform_val)
1360
+ # Storing custom string manipulation parameters in data transform dictionary
1361
+ self.data_transform_dict['custom_string_manipulation_param'] = extracted_col
1362
+
1363
+ self._display_msg(msg="Updated dataset sample after performing string manipulation:",
1364
+ data=self.data,
1365
+ progress_bar=self.progress_bar)
1366
+ else:
1367
+ self._display_msg(inline_msg="Skipping customized string manipulation.",
1368
+ progress_bar=self.progress_bar)
1369
+
1370
+ def _str_method_mapping(self,
1371
+ target_col,
1372
+ transform_val):
1373
+ """
1374
+ DESCRIPTION:
1375
+ Function to map customized parameters according to passed method and
1376
+ performs string manipulation on categorical features.
1377
+
1378
+ PARAMETERS:
1379
+ target_col:
1380
+ Required Argument.
1381
+ Specifies feature for applying string manipulation.
1382
+ Types: str
1383
+
1384
+ transform_val:
1385
+ Required Argument.
1386
+ Specifies different parameter require for applying string manipulation.
1387
+ Types: dict
1388
+
1389
+ RETURNS:
1390
+ DataFrame containing transformed data after applying string manipulation.
1391
+
1392
+ RAISES:
1393
+ None
1394
+
1395
+ EXAMPLES:
1396
+ >>> transform_val = {"StringOperation": "upper"}
1397
+ >>> self._str_method_mapping(target_col="text_col", transform_val=transform_val)
1398
+ """
1399
+ # Creating list of features for accumulating while performing string manipulation on certain features
1400
+ accumulate_columns = self._extract_list(self.data.columns, [target_col])
1401
+
1402
+ # Fetching required parameters from json object
1403
+ string_operation = transform_val["StringOperation"]
1404
+
1405
+ # Setting volatile and persist parameters for performing string manipulation
1406
+ volatile, persist = self._get_generic_parameters(func_indicator="StringManipulationIndicator",
1407
+ param_name="StringManipulationParam")
1408
+
1409
+ # Storing general parameters for performing string transformation
1410
+ fit_params = {
1411
+ "data" : self.data,
1412
+ "target_columns" : target_col,
1413
+ "string_operation" : string_operation,
1414
+ "accumulate" : accumulate_columns,
1415
+ "inplace" : True,
1416
+ "persist" : True
1417
+ }
1418
+ # Disabling display table name if persist is True by default
1419
+ if not volatile and not persist:
1420
+ fit_params["display_table_name"] = False
1421
+
1422
+ if volatile:
1423
+ fit_params["volatile"] = True
1424
+ fit_params["persist"] = False
1425
+
1426
+ # Adding additional parameters based on string operation type
1427
+ if string_operation in ["StringCon", "StringTrim"]:
1428
+ string_argument = transform_val["String"]
1429
+ fit_params = {**fit_params,
1430
+ "string" : string_argument}
1431
+ elif string_operation == "StringPad":
1432
+ string_argument = transform_val["String"]
1433
+ string_length = transform_val["StringLength"]
1434
+ fit_params = {**fit_params,
1435
+ "string" : string_argument,
1436
+ "string_length" : string_length}
1437
+ elif string_operation == "Substring":
1438
+ string_index = transform_val["StartIndex"]
1439
+ string_length = transform_val["StringLength"]
1440
+ fit_params = {**fit_params,
1441
+ "start_index" : string_index,
1442
+ "string_length" : string_length}
1443
+
1444
+ # returning dataset after performing string manipulation
1445
+ transform_output = StrApply(**fit_params).result
1446
+ if not volatile and not persist:
1447
+ # Adding transformed data containing table to garbage collector
1448
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1449
+ self.data_mapping['string_manipulated_data'] = transform_output._table_name
1450
+ return transform_output
1451
+
1452
+ def _one_hot_encoding(self,
1453
+ one_hot_columns,
1454
+ unique_counts):
1455
+ """
1456
+ DESCRIPTION:
1457
+ Function performs the one hot encoding to categorcial columns/features in the dataset.
1458
+
1459
+ PARAMETERS:
1460
+ one_hot_columns:
1461
+ Required Argument.
1462
+ Specifies the categorical columns for which one hot encoding will be performed.
1463
+ Types: str or list of strings (str)
1464
+
1465
+ unique_counts:
1466
+ Required Argument.
1467
+ Specifies the unique counts in the categorical columns.
1468
+ Types: int or list of integer (int)
1469
+
1470
+ RETURNS:
1471
+ None
1472
+
1473
+ RAISES:
1474
+ None
1475
+
1476
+ EXAMPLES:
1477
+ >>> self._one_hot_encoding(one_hot_columns=["category1"], unique_counts=[5])
1478
+ """
1479
+ # TD function will add extra column_other in onehotEncoding, so
1480
+ # initailizing this list to remove those extra columns
1481
+ drop_lst = [ele + "_other" for ele in one_hot_columns]
1482
+
1483
+ # Setting volatile and persist parameters for performing encoding
1484
+ volatile, persist = self._get_generic_parameters(func_indicator="CategoricalEncodingIndicator",
1485
+ param_name="CategoricalEncodingParam")
1486
+
1487
+ # Adding fit parameters for performing encoding
1488
+ fit_params = {
1489
+ "data" : self.data,
1490
+ "approach" : "auto",
1491
+ "is_input_dense" : True,
1492
+ "target_column" : one_hot_columns,
1493
+ "category_counts" : unique_counts,
1494
+ "other_column" : "other",
1495
+ "volatile" : volatile,
1496
+ "persist" : persist
1497
+ }
1498
+ # Performing one hot encoding fit on target columns
1499
+ fit_obj = OneHotEncodingFit(**fit_params)
1500
+ # Storing indicator, fit object and column drop list for one hot encoding in data transform dictionary
1501
+ self.data_transform_dict['one_hot_encoding_ind'] = True
1502
+ self.data_transform_dict['one_hot_encoding_fit_obj'].update({self.one_hot_obj_count : fit_obj.result})
1503
+ self.data_transform_dict['one_hot_encoding_drop_list'].extend(drop_lst)
1504
+ self.one_hot_obj_count = self.one_hot_obj_count + 1
1505
+ # Adding transform parameters for performing encoding
1506
+ transform_params = {
1507
+ "data" : self.data,
1508
+ "object" : fit_obj.result,
1509
+ "is_input_dense" : True,
1510
+ "persist" : True
1511
+ }
1512
+ # Disabling display table name if persist is True by default
1513
+ if not volatile and not persist:
1514
+ transform_params["display_table_name"] = False
1515
+
1516
+ # Setting persist to False if volatile is True
1517
+ if volatile:
1518
+ transform_params["volatile"] = True
1519
+ transform_params["persist"] = False
1520
+
1521
+ # Performing one hot encoding transformation
1522
+ transform_output = OneHotEncodingTransform(**transform_params).result
1523
+
1524
+ if not volatile and not persist:
1525
+ # Adding transformed data containing table to garbage collector
1526
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1527
+ self.data = transform_output.drop(drop_lst, axis=1)
1528
+ self.data.materialize()
1529
+ self.data_mapping['one_hot_encoded_data'] = transform_output._table_name
1530
+ self.data_mapping['fit_ohe_result'] = fit_obj.result._table_name
1531
+
1532
+ def _ordinal_encoding(self,
1533
+ ordinal_columns):
1534
+ """
1535
+ DESCRIPTION:
1536
+ Function performs the ordinal encoding to categorcial columns or features in the dataset.
1537
+
1538
+ PARAMETERS:
1539
+ ordinal_columns:
1540
+ Required Argument.
1541
+ Specifies the categorical columns for which ordinal encoding will be performed.
1542
+ Types: str or list of strings (str)
1543
+
1544
+ RETURNS:
1545
+ None
1546
+
1547
+ RAISES:
1548
+ None
1549
+
1550
+ EXAMPLES:
1551
+ >>> self._ordinal_encoding(ordinal_columns=["category1", "category2"])
1552
+ """
1553
+ # Setting volatile and persist parameters for performing encoding
1554
+ volatile, persist = self._get_generic_parameters(func_indicator="CategoricalEncodingIndicator",
1555
+ param_name="CategoricalEncodingParam")
1556
+
1557
+ # Adding fit parameters for performing encoding
1558
+ fit_params = {
1559
+ "data" : self.data,
1560
+ "target_column" : ordinal_columns,
1561
+ "volatile" : volatile,
1562
+ "persist" : persist
1563
+ }
1564
+ # Performing ordinal encoding fit on target columns
1565
+ ord_fit_obj = OrdinalEncodingFit(**fit_params)
1566
+ # Storing fit object and column list for ordinal encoding in data transform dictionary
1567
+ if ordinal_columns[0] != self.target_column:
1568
+ self.data_transform_dict["custom_ord_encoding_fit_obj"] = ord_fit_obj.result
1569
+ self.data_transform_dict['custom_ord_encoding_col'] = ordinal_columns
1570
+ else:
1571
+ self.data_transform_dict['target_col_encode_ind'] = True
1572
+ self.data_transform_dict['target_col_ord_encoding_fit_obj'] = ord_fit_obj.result
1573
+ # Extracting accumulate columns
1574
+ accumulate_columns = self._extract_list(self.data.columns, ordinal_columns)
1575
+ # Adding transform parameters for performing encoding
1576
+ transform_params = {
1577
+ "data" : self.data,
1578
+ "object" : ord_fit_obj.result,
1579
+ "accumulate" : accumulate_columns,
1580
+ "persist" : True
1581
+ }
1582
+ # Disabling display table name if persist is True by default
1583
+ if not volatile and not persist:
1584
+ transform_params["display_table_name"] = False
1585
+
1586
+ # Setting persist to False if volatile is True
1587
+ if volatile:
1588
+ transform_params["volatile"] = True
1589
+ transform_params["persist"] = False
1590
+ # Performing ordinal encoding transformation
1591
+ self.data = OrdinalEncodingTransform(**transform_params).result
1592
+
1593
+ if not volatile and not persist:
1594
+ # Adding transformed data containing table to garbage collector
1595
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1596
+
1597
+ self.data_mapping['fit_ordinal_output'] = ord_fit_obj.output_data._table_name
1598
+ self.data_mapping['fit_ordinal_result'] = ord_fit_obj.result._table_name
1599
+ self.data_mapping['ordinal_encoded_data'] = self.data._table_name
1600
+
1601
+ if len(ordinal_columns) == 1 and ordinal_columns[0] == self.target_column:
1602
+ self.target_label = ord_fit_obj
1603
+
1604
+ def _target_encoding(self,
1605
+ target_encoding_list):
1606
+ """
1607
+ DESCRIPTION:
1608
+ Function performs the target encoding to categorcial columns/features in the dataset.
1609
+
1610
+ PARAMETERS:
1611
+ target_encoding_list:
1612
+ Required Argument.
1613
+ Specifies the categorical columns for which target encoding will be performed.
1614
+ Types: str or list of strings (str)
1615
+
1616
+ RETURNS:
1617
+ None
1618
+
1619
+ RAISES:
1620
+ TeradataMlException
1621
+
1622
+ EXAMPLES:
1623
+ >>> target_dict = {"category": {"encoder_method": "mean", "response_column": "target"}}
1624
+ >>> self._target_encoding(target_encoding_list=target_dict)
1625
+ """
1626
+ # Fetching all columns on which target encoding will be performed.
1627
+ target_columns = list(target_encoding_list.keys())
1628
+ # Checking for column present in dataset or not
1629
+ _Validators._validate_dataframe_has_argument_columns(target_columns, "TargetEncodingList", self.data, "df")
1630
+ # Finding distinct values and counts for columns.
1631
+ cat_sum = CategoricalSummary(data=self.data,
1632
+ target_columns=target_columns)
1633
+ category_data = cat_sum.result.groupby("ColumnName").count()
1634
+ category_data = category_data.assign(drop_columns=True,
1635
+ ColumnName=category_data.ColumnName,
1636
+ CategoryCount=category_data.count_DistinctValue)
1637
+ # Storing indicator and fit object for target encoding in data transform dictionary
1638
+ self.data_transform_dict["custom_target_encoding_ind"] = True
1639
+ self.data_transform_dict["custom_target_encoding_fit_obj"] = {}
1640
+
1641
+ # Setting volatile and persist parameters for performing encoding
1642
+ volatile, persist = self._get_generic_parameters(func_indicator="CategoricalEncodingIndicator",
1643
+ param_name="CategoricalEncodingParam")
1644
+
1645
+ # Fetching required argument for performing target encoding
1646
+ for col,transform_val in target_encoding_list.items():
1647
+ encoder_method = transform_val["encoder_method"]
1648
+ response_column = transform_val["response_column"]
1649
+ # Adding fit parameters for performing encoding
1650
+ fit_params = {
1651
+ "data" : self.data,
1652
+ "category_data" : category_data,
1653
+ "encoder_method" : encoder_method,
1654
+ "target_columns" : col,
1655
+ "response_column" : response_column,
1656
+ "default_values": -1,
1657
+ "volatile" : volatile,
1658
+ "persist" : persist
1659
+ }
1660
+ if encoder_method == "CBM_DIRICHLET":
1661
+ num_distinct_responses=transform_val["num_distinct_responses"]
1662
+ fit_params = {**fit_params,
1663
+ "num_distinct_responses" : num_distinct_responses}
1664
+ # Performing target encoding fit on target columns
1665
+ tar_fit_obj = TargetEncodingFit(**fit_params)
1666
+ # Storing each column fit object for target encoding in data transform dictionary
1667
+ self.data_transform_dict["custom_target_encoding_fit_obj"].update({col : tar_fit_obj.result})
1668
+ # Extracting accumulate columns
1669
+ accumulate_columns = self._extract_list(self.data.columns, [col])
1670
+ # Adding transform parameters for performing encoding
1671
+ transform_params = {
1672
+ "data" : self.data,
1673
+ "object" : tar_fit_obj,
1674
+ "accumulate" : accumulate_columns,
1675
+ "persist" : True
1676
+ }
1677
+
1678
+ # Disabling display table name if persist is True by default
1679
+ if not volatile and not persist:
1680
+ transform_params["display_table_name"] = False
1681
+
1682
+ if volatile:
1683
+ transform_params["volatile"] = True
1684
+ transform_params["persist"] = False
1685
+ # Performing ordinal encoding transformation
1686
+ self.data = TargetEncodingTransform(**transform_params).result
1687
+ if not volatile and not persist:
1688
+ # Adding transformed data containing table to garbage collector
1689
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1690
+ self.data_mapping[f'fit_{col}_target_output'] = tar_fit_obj.output_data._table_name
1691
+ self.data_mapping[f'fit_{col}_target_result'] = tar_fit_obj.result._table_name
1692
+ self.data_mapping[f'{col}_target_encoded_data'] = self.data._table_name
1693
+
1694
+ def _encoding_categorical_columns(self):
1695
+ """
1696
+ DESCRIPTION:
1697
+ Function detects the categorical columns and performs encoding on categorical columns in the dataset.
1698
+
1699
+ PARAMETERS:
1700
+ None
1701
+
1702
+ RETURNS:
1703
+ None
1704
+
1705
+ RAISES:
1706
+ None
1707
+
1708
+ EXAMPLES:
1709
+ >>> self._encoding_categorical_columns()
1710
+ """
1711
+ self._display_msg(msg="Performing encoding for categorical columns ...",
1712
+ progress_bar=self.progress_bar,
1713
+ show_data=True)
1714
+ start_time = time.time()
1715
+
1716
+ ohe_col = []
1717
+ unique_count = []
1718
+
1719
+ # List of columns before one hot
1720
+ col_bf_ohe = self.data.columns
1721
+
1722
+ # Get distinct value in each column
1723
+ self._get_distinct_count()
1724
+
1725
+ # Detecting categorical columns with thier unique counts
1726
+ for col, d_type in self.data._column_names_and_types:
1727
+ if d_type in ['str']:
1728
+ ohe_col.append(col)
1729
+ unique_count.append(self.counts_dict[f'count_{col}'])
1730
+
1731
+ if len(ohe_col) != 0:
1732
+ self._one_hot_encoding(ohe_col, unique_count)
1733
+
1734
+ self._display_msg(msg="ONE HOT Encoding these Columns:",
1735
+ col_lst=ohe_col,
1736
+ progress_bar=self.progress_bar)
1737
+ self._display_msg(msg="Sample of dataset after performing one hot encoding:",
1738
+ data=self.data,
1739
+ progress_bar=self.progress_bar)
1740
+ else:
1741
+ self._display_msg(inline_msg="Analysis completed. No categorical columns were found.",
1742
+ progress_bar=self.progress_bar)
1743
+
1744
+ # List of columns after one hot
1745
+ col_af_ohe = self.data.columns
1746
+
1747
+ # List of excluded columns from outlier processing and scaling
1748
+ self.excluded_cols= self._extract_list(col_af_ohe, col_bf_ohe)
1749
+
1750
+ end_time = time.time()
1751
+ self._display_msg(msg="Time taken to encode the columns: {:.2f} sec".format( end_time - start_time),
1752
+ progress_bar=self.progress_bar,
1753
+ show_data=True)
1754
+
1755
+ def _custom_categorical_encoding(self):
1756
+ """
1757
+ DESCRIPTION:
1758
+ Function to perform specific encoding on the categorical columns based on user input.
1759
+ if validation fails, default encoding is getting performed on all remaining categorical columns.
1760
+
1761
+ PARAMETERS:
1762
+ None
1763
+
1764
+ RETURNS:
1765
+ None
1766
+
1767
+ RAISES:
1768
+ TeradataMlException
1769
+
1770
+ EXAMPLES:
1771
+ >>> self._custom_categorical_encoding()
1772
+ """
1773
+ self._display_msg(msg="Starting Customized Categorical Feature Encoding ...",
1774
+ progress_bar=self.progress_bar)
1775
+ cat_end_input = self.custom_data.get("CategoricalEncodingIndicator", False)
1776
+ # Checking user input for categorical encoding
1777
+ if cat_end_input:
1778
+ # Storing custom categorical encoding indicator in data transform dictionary
1779
+ self.data_transform_dict["custom_categorical_encoding_ind"] = True
1780
+ # Fetching user input list for performing
1781
+ encoding_list = self.custom_data.get("CategoricalEncodingParam", None).copy()
1782
+ if encoding_list:
1783
+ volatile = encoding_list.pop("volatile", False)
1784
+ persist = encoding_list.pop("persist", False)
1785
+ onehot_encode_ind = encoding_list.get("OneHotEncodingIndicator", False)
1786
+ ordinal_encode_ind = encoding_list.get("OrdinalEncodingIndicator", False)
1787
+ target_encode_ind = encoding_list.get("TargetEncodingIndicator", False)
1788
+ # Checking if any of categorical encoding technique indicator
1789
+ if not any([onehot_encode_ind, ordinal_encode_ind, target_encode_ind]):
1790
+ self._display_msg(inline_msg="No information provided for any type of customized categorical encoding techniques. AutoML will proceed with default encoding technique.",
1791
+ progress_bar=self.progress_bar)
1792
+ else:
1793
+ if onehot_encode_ind:
1794
+ unique_count = []
1795
+ ohe_list = encoding_list.get("OneHotEncodingList", None)
1796
+ # Checking for empty list
1797
+ if not ohe_list:
1798
+ self._display_msg(inline_msg="No information provided for customized one hot encoding technique.",
1799
+ progress_bar=self.progress_bar)
1800
+ else:
1801
+ # Checking for column present in dataset or not
1802
+ _Validators._validate_dataframe_has_argument_columns(ohe_list, "OneHotEncodingList", self.data, "df")
1803
+
1804
+ # Keeping track for existing columns before apply one hot encoding
1805
+ col_bf_ohe = self.data.columns
1806
+ # Detecting categorical columns with their unique counts
1807
+ for col in ohe_list:
1808
+ unique_count.append(self.data.drop_duplicate(col).size)
1809
+ # Performing one hot encoding
1810
+ self._one_hot_encoding(ohe_list, unique_count)
1811
+ # Keeping track for new columns after apply one hot encoding
1812
+ col_af_ohe = self.data.columns
1813
+ # Fetching list of columns on which outlier processing should not be applied
1814
+ self.excluded_cols.extend(self._extract_list(col_af_ohe, col_bf_ohe))
1815
+
1816
+ self._display_msg(msg="Updated dataset sample after performing one hot encoding:",
1817
+ data=self.data,
1818
+ progress_bar=self.progress_bar)
1819
+
1820
+ if ordinal_encode_ind:
1821
+ ord_list = encoding_list.get("OrdinalEncodingList", None)
1822
+ # Checking for empty list
1823
+ if not ord_list:
1824
+ self._display_msg(inline_msg="No information provided for customized ordinal encoding technique.",
1825
+ progress_bar=self.progress_bar)
1826
+ else:
1827
+ # Checking for column present in dataset or not
1828
+ _Validators._validate_dataframe_has_argument_columns(ord_list, "OrdinalEncodingList", self.data, "df")
1829
+
1830
+ # Performing ordinal encoding
1831
+ self._ordinal_encoding(ord_list)
1832
+ self._display_msg(msg="Updated dataset sample after performing ordinal encoding:",
1833
+ data=self.data,
1834
+ progress_bar=self.progress_bar)
1835
+
1836
+ if target_encode_ind:
1837
+ if self.cluster:
1838
+ self._display_msg(inline_msg="Target Encoding is not applicable for clustering. Skipping it.",
1839
+ progress_bar=self.progress_bar)
1840
+ else:
1841
+ tar_list = encoding_list.get("TargetEncodingList", None)
1842
+ if not tar_list:
1843
+ self._display_msg(inline_msg="No information provided for customized target encoding technique.",
1844
+ progress_bar=self.progress_bar)
1845
+ else:
1846
+ # Performing target encoding
1847
+ self._target_encoding(tar_list)
1848
+ self._display_msg(msg="Updated dataset sample after performing target encoding:",
1849
+ data=self.data,
1850
+ progress_bar=self.progress_bar)
1851
+ else:
1852
+ self._display_msg(inline_msg="No input provided for performing customized categorical encoding. AutoML will proceed with default encoding technique.",
1853
+ progress_bar=self.progress_bar)
1854
+ else:
1855
+ self._display_msg(inline_msg="AutoML will proceed with default encoding technique.",
1856
+ progress_bar=self.progress_bar)
1857
+
1858
+ # Performing default encoding on remaining categorical columns
1859
+ self._encoding_categorical_columns()
1860
+
1861
+ def _numapply_transformation(self, target_col, transform_val):
1862
+ """
1863
+ DESCRIPTION:
1864
+ Function to perform different numerical transformations using NumApply on numerical features based on user input.
1865
+
1866
+ PARAMETERS:
1867
+ target_col:
1868
+ Required Argument.
1869
+ Specifies the numerical column for which transformation will be performed.
1870
+ Types: str
1871
+
1872
+ transform_val:
1873
+ Required Argument.
1874
+ Specifies different parameter require for applying numerical transformation.
1875
+ Types: dict
1876
+
1877
+ RETURNS:
1878
+ NumApply result object containing the transformed data.
1879
+
1880
+ RAISES:
1881
+ None
1882
+
1883
+ EXAMPLES:
1884
+ >>> transform_val = {"apply_method": "sqrt"}
1885
+ >>> result = self._numapply_transformation(target_col="numeric_col", transform_val=transform_val)
1886
+ """
1887
+ # Fetching columns for accumulation
1888
+ accumulate_columns = self._extract_list(self.data.columns, [target_col])
1889
+ apply_method = transform_val["apply_method"]
1890
+
1891
+ # Setting volatile and persist parameters for performing transformation
1892
+ volatile, persist = self._get_generic_parameters(func_indicator="MathameticalTransformationIndicator",
1893
+ param_name="MathameticalTransformationParam")
1894
+ # Adding fit parameters for performing transformation
1895
+ fit_params={
1896
+ "data": self.data,
1897
+ "target_columns" : target_col,
1898
+ "apply_method" : apply_method,
1899
+ "inplace" : True,
1900
+ "persist" :True,
1901
+ "accumulate" : accumulate_columns
1902
+ }
1903
+ # Disabling display table name if persist is True by default
1904
+ if not volatile and not persist:
1905
+ fit_params["display_table_name"] = False
1906
+
1907
+ if volatile:
1908
+ fit_params["volatile"] = True
1909
+ fit_params["persist"] = False
1910
+ # Adding addition details for fit parameters in case of SIGMOID transformation
1911
+ if apply_method == "sigmoid":
1912
+ sigmoid_style=transform_val["sigmoid_style"]
1913
+ fit_params = {**fit_params, "sigmoid_style" : sigmoid_style}
1914
+ # Performing transformation on target columns
1915
+ transform_output = NumApply(**fit_params).result
1916
+ if not volatile and not persist:
1917
+ # Adding transformed data containing table to garbage collector
1918
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1919
+ return transform_output
1920
+
1921
+ def _numerical_transformation(self, target_columns, num_transform_data, volatile, persist):
1922
+ """
1923
+ DESCRIPTION:
1924
+ Function to perform different numerical transformations using Fit and Transform on numerical features based on user input.
1925
+
1926
+ PARAMETERS:
1927
+ target_columns:
1928
+ Required Argument.
1929
+ Specifies the target columns for numerical transformation.
1930
+ Types: list
1931
+
1932
+ num_transform_data:
1933
+ Required Argument.
1934
+ Specifies the numerical transformation data object.
1935
+ Types: object
1936
+
1937
+ volatile:
1938
+ Required Argument.
1939
+ Specifies whether to use volatile tables.
1940
+ Types: bool
1941
+
1942
+ persist:
1943
+ Required Argument.
1944
+ Specifies whether to persist results.
1945
+ Types: bool
1946
+
1947
+ RETURNS:
1948
+ None
1949
+
1950
+ RAISES:
1951
+ None
1952
+
1953
+ EXAMPLES:
1954
+ >>> self._numerical_transformation(target_columns=["col1"], num_transform_data=transform_obj, volatile=False, persist=True)
1955
+ """
1956
+ # Adding fit parameters for transformation
1957
+ fit_params={
1958
+ "data" : self.data,
1959
+ "object" : num_transform_data,
1960
+ "object_order_column" : "TargetColumn",
1961
+ "volatile" : volatile,
1962
+ "persist" : persist
1963
+ }
1964
+ # Peforming fit with all arguments.
1965
+ num_fit_obj = Fit(**fit_params)
1966
+ # Fetching all numerical columns
1967
+ numerical_columns = [col for col, d_type in self.data._column_names_and_types if d_type in ["int","float"]]
1968
+ # Extracting id columns where transformation should not affect numerical columns
1969
+ id_columns = self._extract_list(numerical_columns,target_columns)
1970
+ # Storing fit object and id column list for numerical transformation in data transform dictionary
1971
+ self.data_transform_dict['custom_numerical_transformation_fit_object'] = num_fit_obj.result
1972
+ self.data_transform_dict['custom_numerical_transformation_id_columns'] = id_columns
1973
+ # Adding transform parameters for transformation
1974
+ transform_params={
1975
+ "data" : self.data,
1976
+ "object" : num_fit_obj.result,
1977
+ "id_columns" : id_columns,
1978
+ "persist" :True
1979
+ }
1980
+ # Disabling display table name if persist is True by default
1981
+ if not volatile and not persist:
1982
+ transform_params["display_table_name"] = False
1983
+
1984
+ if volatile:
1985
+ transform_params["volatile"] = True
1986
+ transform_params["persist"] = False
1987
+ # Peforming transformation on target columns
1988
+ self.data = Transform(**transform_params).result
1989
+ if not volatile and not persist:
1990
+ # Adding transformed data containing table to garbage collector
1991
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
1992
+
1993
+ self.data_mapping['fit_numerical_result'] = num_fit_obj.result._table_name
1994
+ self.data_mapping['numerical_transformed_data'] = self.data._table_name
1995
+ self._display_msg(msg="Updated dataset sample after applying numerical transformation:",
1996
+ data=self.data,
1997
+ progress_bar=self.progress_bar)
1998
+
1999
+ def _mathematical_transformation(self):
2000
+ """
2001
+ DESCRIPTION:
2002
+ Function to perform different mathematical transformations (i.e., log, pow,
2003
+ exp, sininv, sigmoid) on numerical features based on user input.
2004
+
2005
+ PARAMETERS:
2006
+ None
2007
+
2008
+ RETURNS:
2009
+ None
2010
+
2011
+ RAISES:
2012
+ TeradataMlException
2013
+
2014
+ EXAMPLES:
2015
+ >>> self._mathematical_transformation()
2016
+ """
2017
+ self._display_msg(msg="Starting customized mathematical transformation ...",
2018
+ progress_bar=self.progress_bar,
2019
+ show_data=True)
2020
+
2021
+ mat_transform_input = self.custom_data.get("MathameticalTransformationIndicator", False)
2022
+ # Checking user input for mathematical transformations
2023
+ if mat_transform_input:
2024
+ # Extracting list required for mathematical transformations
2025
+ mat_transform_list = self.custom_data.get("MathameticalTransformationParam", None).copy()
2026
+
2027
+ if mat_transform_list:
2028
+ volatile = mat_transform_list.pop("volatile", False)
2029
+ persist = mat_transform_list.pop("persist", False)
2030
+ # Checking for column present in dataset or not
2031
+ _Validators._validate_dataframe_has_argument_columns(list(mat_transform_list.keys()),
2032
+ "MathameticalTransformationParam", self.data, "df")
2033
+
2034
+ # List of storing target columns and mathematical transformation information
2035
+ transform_data=[]
2036
+ target_columns=[]
2037
+ # Storing custom mathematical transformation indicator in data transform dictionary
2038
+ self.data_transform_dict['custom_mathematical_transformation_ind'] = True
2039
+ # Storing custom numapply transformation parameters in data transform dictionary
2040
+ self.data_transform_dict['custom_numapply_transformation_param'] = {}
2041
+
2042
+ for col, transform_val in mat_transform_list.items():
2043
+ apply_method=transform_val["apply_method"]
2044
+ if apply_method in (["sininv","sigmoid"]):
2045
+ # Applying numapply transformation
2046
+ self.data = self._numapply_transformation(col,transform_val)
2047
+ self.data_mapping[f'{apply_method}_transformed_data'] = self.data._table_name
2048
+ self._display_msg(msg="Updated dataset sample after applying numapply transformation:",
2049
+ data=self.data,
2050
+ progress_bar=self.progress_bar)
2051
+ # Updating parameter details for each column
2052
+ self.data_transform_dict['custom_numapply_transformation_param'].update({col:transform_val})
2053
+ else:
2054
+ # Handling specific scenarios for log and pow transformation
2055
+ parameters=""
2056
+ if apply_method == "log":
2057
+ base = transform_val["base"]
2058
+ parameters = json.dumps({"base":base})
2059
+ elif apply_method == "pow":
2060
+ exponent = transform_val["exponent"]
2061
+ parameters = json.dumps({"exponent":exponent})
2062
+ target_columns.append(col)
2063
+ transform_data.append({"TargetColumn":col, "DefaultValue":1, "Transformation":apply_method, "Parameters":parameters})
2064
+ # Checking for transformation data
2065
+ if len(transform_data):
2066
+ # Coverting into pandas and then teradata dataframe for performing further opration
2067
+ transform_data = pd.DataFrame(transform_data, columns=["TargetColumn", "DefaultValue", "Transformation", "Parameters"])
2068
+ self._display_msg(msg="Numerical transformation information :-",
2069
+ data=transform_data,
2070
+ progress_bar=self.progress_bar)
2071
+ copy_to_sql(df=transform_data, table_name="automl_num_transform_data", temporary=True)
2072
+ num_transform_data = DataFrame.from_table("automl_num_transform_data")
2073
+ # Applying transformation using Fit/Transform functions
2074
+ self._numerical_transformation(target_columns, num_transform_data, volatile, persist)
2075
+ # Storing custom numerical transformation parameters and column list in data transform dictionary
2076
+ self.data_transform_dict['custom_numerical_transformation_col'] = target_columns
2077
+ self.data_transform_dict['custom_numerical_transformation_params'] = num_transform_data
2078
+ else:
2079
+ self._display_msg(inline_msg="No input provided for performing customized mathematical transformation.",
2080
+ progress_bar=self.progress_bar)
2081
+ else:
2082
+ self._display_msg(inline_msg="Skipping customized mathematical transformation.",
2083
+ progress_bar=self.progress_bar)
2084
+
2085
+ def _non_linear_transformation(self):
2086
+ """
2087
+ DESCRIPTION:
2088
+ Function to perform customized non-linear transformation on numerical features based on user input.
2089
+
2090
+ PARAMETERS:
2091
+ None
2092
+
2093
+ RETURNS:
2094
+ None
2095
+
2096
+ RAISES:
2097
+ TeradataMlException
2098
+
2099
+ EXAMPLES:
2100
+ >>> self._non_linear_transformation()
2101
+ """
2102
+ self._display_msg(msg="Starting customized non-linear transformation ...",
2103
+ progress_bar=self.progress_bar,
2104
+ show_data=True)
2105
+ nl_transform_input = self.custom_data.get("NonLinearTransformationIndicator", False)
2106
+ # Checking user input for non-linear transformation
2107
+ if nl_transform_input:
2108
+ nl_transform_list = self.custom_data.get("NonLinearTransformationParam", None)
2109
+ # Extracting list required for non-linear transformation
2110
+ if nl_transform_list:
2111
+ volatile = nl_transform_list.pop("volatile", False)
2112
+ persist = nl_transform_list.pop("persist", False)
2113
+ total_combination = len(nl_transform_list)
2114
+ # Generating all possible combination names
2115
+ possible_combination = ["Combination_"+str(counter) for counter in range(1,total_combination+1)]
2116
+ self._display_msg(msg="Possible combination :",
2117
+ col_lst=possible_combination,
2118
+ progress_bar=self.progress_bar)
2119
+ # Storing custom non-linear transformation indicator in data transform dictionary
2120
+ self.data_transform_dict['custom_non_linear_transformation_ind'] = True
2121
+ # Storing custom non-linear transformation fit object in data transform dictionary
2122
+ self.data_transform_dict['custom_non_linear_transformation_fit_object'] = {}
2123
+ # print("Possible combination :",possible_combination)
2124
+ # Performing transformation for each combination
2125
+ for comb, transform_val in nl_transform_list.items():
2126
+ if comb in possible_combination:
2127
+ target_columns = transform_val["target_columns"]
2128
+ # Checking for column present in dataset or not
2129
+ _Validators._validate_dataframe_has_argument_columns(target_columns,
2130
+ "target_columns", self.data, "df")
2131
+
2132
+ formula = transform_val["formula"]
2133
+ result_column = transform_val["result_column"]
2134
+ # Adding fit params for transformation
2135
+ fit_param = {
2136
+ "data" : self.data,
2137
+ "target_columns" : target_columns,
2138
+ "formula" : formula,
2139
+ "result_column" : result_column,
2140
+ "volatile" : volatile,
2141
+ "persist" : persist
2142
+ }
2143
+ # Performing fit on dataset
2144
+ fit_obj = NonLinearCombineFit(**fit_param)
2145
+ # Updating it for each non-linear combination
2146
+ self.data_transform_dict['custom_non_linear_transformation_fit_object'].update({comb:fit_obj.result})
2147
+ # Adding transform params for transformation
2148
+ transform_params = {
2149
+ "data" : self.data,
2150
+ "object" : fit_obj,
2151
+ "accumulate" : self.data.columns,
2152
+ "persist" : True
2153
+ }
2154
+ # Disabling display table name if persist is True by default
2155
+ if not volatile and not persist:
2156
+ transform_params["display_table_name"] = False
2157
+
2158
+ if volatile:
2159
+ transform_params["volatile"] = True
2160
+ transform_params["persist"] = False
2161
+ self.data = NonLinearCombineTransform(**transform_params).result
2162
+
2163
+ self.data_mapping[f'fit_nonlinear_{comb}_output'] = fit_obj.output_data._table_name
2164
+ self.data_mapping[f'fit_nonlinear_{comb}_result'] = fit_obj.result._table_name
2165
+ self.data_mapping['non_linear_transformed_data'] = self.data._table_name
2166
+
2167
+ if not volatile and not persist:
2168
+ # Adding transformed data containing table to garbage collector
2169
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
2170
+ else:
2171
+ self._display_msg(inline_msg="Combinations are not as per expectation.",
2172
+ progress_bar=self.progress_bar)
2173
+ self._display_msg(msg="Updated dataset sample after performing non-liner transformation:",
2174
+ data=self.data,
2175
+ progress_bar=self.progress_bar)
2176
+ else:
2177
+ self._display_msg(inline_msg="No information provided for performing customized non-linear transformation.",
2178
+ progress_bar=self.progress_bar)
2179
+ else:
2180
+ self._display_msg(inline_msg="Skipping customized non-linear transformation.",
2181
+ progress_bar=self.progress_bar)
2182
+
2183
+ def _anti_select_columns(self):
2184
+ """
2185
+ DESCRIPTION:
2186
+ Function to remove specific features from dataset based on user input.
2187
+
2188
+ PARAMETERS:
2189
+ None
2190
+
2191
+ RETURNS:
2192
+ None
2193
+
2194
+ RAISES:
2195
+ None
2196
+
2197
+ EXAMPLES:
2198
+ >>> self._anti_select_columns()
2199
+ """
2200
+ self._display_msg(msg="Starting customized anti-select columns ...",
2201
+ progress_bar=self.progress_bar,
2202
+ show_data=True)
2203
+ anti_select_input = self.custom_data.get("AntiselectIndicator", False)
2204
+ # Checking user input for anti-select columns
2205
+ if anti_select_input:
2206
+ anti_select_params = self.custom_data.get("AntiselectParam", None)
2207
+ if anti_select_params:
2208
+ # Extracting list required for anti-select columns
2209
+ anti_select_list = anti_select_params.get("excluded_columns", None)
2210
+ volatile = anti_select_params.get("volatile", False)
2211
+ persist = anti_select_params.get("persist", False)
2212
+ if(anti_select_list):
2213
+ if all(item in self.data.columns for item in anti_select_list):
2214
+ # Storing custom anti-select columns indicator and column list in data transform dictionary
2215
+ self.data_transform_dict['custom_anti_select_columns_ind'] = True
2216
+ self.data_transform_dict['custom_anti_select_columns'] = anti_select_list
2217
+ fit_params = {
2218
+ "data" : self.data,
2219
+ "exclude" : anti_select_list,
2220
+ "volatile" : volatile,
2221
+ "persist" : persist
2222
+ }
2223
+ # Performing transformation for given user input
2224
+ self.data = Antiselect(**fit_params).result
2225
+ self._display_msg(msg="Updated dataset sample after performing anti-select columns:",
2226
+ data=self.data,
2227
+ progress_bar=self.progress_bar)
2228
+ else:
2229
+ self._display_msg(msg="Columns provided in list are not present in dataset:",
2230
+ col_lst=anti_select_list,
2231
+ progress_bar=self.progress_bar)
2232
+ else:
2233
+ self._display_msg(inline_msg="No information provided for performing anti-select columns operation.",
2234
+ progress_bar=self.progress_bar)
2235
+ else:
2236
+ self._display_msg(inline_msg="Skipping customized anti-select columns.",
2237
+ progress_bar=self.progress_bar)
2238
+
2239
+ def _get_generic_parameters(self,
2240
+ func_indicator=None,
2241
+ param_name=None):
2242
+ """
2243
+ DESCRIPTION:
2244
+ Function to set generic parameters.
2245
+
2246
+ PARAMETERS:
2247
+ func_indicator:
2248
+ Optional Argument.
2249
+ Specifies the name of function indicator.
2250
+ Types: str
2251
+
2252
+ param_name:
2253
+ Optional Argument.
2254
+ Specifies the name of the param which contains generic parameters.
2255
+ Types: str
2256
+
2257
+ RETURNS:
2258
+ tuple containing volatile and persist parameters.
2259
+
2260
+ RAISES:
2261
+ None
2262
+
2263
+ EXAMPLES:
2264
+ >>> volatile, persist = self._get_generic_parameters(func_indicator="CategoricalEncodingIndicator", param_name="CategoricalEncodingParam")
2265
+ """
2266
+ # Prioritizing persist argument and then volatile
2267
+ persist = self.persist
2268
+ volatile = self.volatile or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and persist is False)
2269
+ if self.custom_data is not None and self.custom_data.get(func_indicator, False):
2270
+ volatile = self.custom_data[param_name].get("volatile", False)
2271
+ persist = self.custom_data[param_name].get("persist", False)
2272
+
2273
+ return (volatile, persist)