teradataml 20.0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1208) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +2762 -0
  4. teradataml/__init__.py +78 -0
  5. teradataml/_version.py +11 -0
  6. teradataml/analytics/Transformations.py +2996 -0
  7. teradataml/analytics/__init__.py +82 -0
  8. teradataml/analytics/analytic_function_executor.py +2416 -0
  9. teradataml/analytics/analytic_query_generator.py +1050 -0
  10. teradataml/analytics/byom/H2OPredict.py +514 -0
  11. teradataml/analytics/byom/PMMLPredict.py +437 -0
  12. teradataml/analytics/byom/__init__.py +16 -0
  13. teradataml/analytics/json_parser/__init__.py +133 -0
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
  15. teradataml/analytics/json_parser/json_store.py +191 -0
  16. teradataml/analytics/json_parser/metadata.py +1666 -0
  17. teradataml/analytics/json_parser/utils.py +805 -0
  18. teradataml/analytics/meta_class.py +236 -0
  19. teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
  21. teradataml/analytics/sqle/__init__.py +128 -0
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
  24. teradataml/analytics/table_operator/__init__.py +11 -0
  25. teradataml/analytics/uaf/__init__.py +82 -0
  26. teradataml/analytics/utils.py +828 -0
  27. teradataml/analytics/valib.py +1617 -0
  28. teradataml/automl/__init__.py +5835 -0
  29. teradataml/automl/autodataprep/__init__.py +493 -0
  30. teradataml/automl/custom_json_utils.py +1625 -0
  31. teradataml/automl/data_preparation.py +1384 -0
  32. teradataml/automl/data_transformation.py +1254 -0
  33. teradataml/automl/feature_engineering.py +2273 -0
  34. teradataml/automl/feature_exploration.py +1873 -0
  35. teradataml/automl/model_evaluation.py +488 -0
  36. teradataml/automl/model_training.py +1407 -0
  37. teradataml/catalog/__init__.py +2 -0
  38. teradataml/catalog/byom.py +1759 -0
  39. teradataml/catalog/function_argument_mapper.py +859 -0
  40. teradataml/catalog/model_cataloging_utils.py +491 -0
  41. teradataml/clients/__init__.py +0 -0
  42. teradataml/clients/auth_client.py +137 -0
  43. teradataml/clients/keycloak_client.py +165 -0
  44. teradataml/clients/pkce_client.py +481 -0
  45. teradataml/common/__init__.py +1 -0
  46. teradataml/common/aed_utils.py +2078 -0
  47. teradataml/common/bulk_exposed_utils.py +113 -0
  48. teradataml/common/constants.py +1669 -0
  49. teradataml/common/deprecations.py +166 -0
  50. teradataml/common/exceptions.py +147 -0
  51. teradataml/common/formula.py +743 -0
  52. teradataml/common/garbagecollector.py +666 -0
  53. teradataml/common/logger.py +1261 -0
  54. teradataml/common/messagecodes.py +518 -0
  55. teradataml/common/messages.py +262 -0
  56. teradataml/common/pylogger.py +67 -0
  57. teradataml/common/sqlbundle.py +764 -0
  58. teradataml/common/td_coltype_code_to_tdtype.py +48 -0
  59. teradataml/common/utils.py +3166 -0
  60. teradataml/common/warnings.py +36 -0
  61. teradataml/common/wrapper_utils.py +625 -0
  62. teradataml/config/__init__.py +0 -0
  63. teradataml/config/dummy_file1.cfg +5 -0
  64. teradataml/config/dummy_file2.cfg +3 -0
  65. teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
  66. teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
  67. teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
  68. teradataml/context/__init__.py +0 -0
  69. teradataml/context/aed_context.py +223 -0
  70. teradataml/context/context.py +1462 -0
  71. teradataml/data/A_loan.csv +19 -0
  72. teradataml/data/BINARY_REALS_LEFT.csv +11 -0
  73. teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
  74. teradataml/data/B_loan.csv +49 -0
  75. teradataml/data/BuoyData2.csv +17 -0
  76. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
  77. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
  78. teradataml/data/Convolve2RealsLeft.csv +5 -0
  79. teradataml/data/Convolve2RealsRight.csv +5 -0
  80. teradataml/data/Convolve2ValidLeft.csv +11 -0
  81. teradataml/data/Convolve2ValidRight.csv +11 -0
  82. teradataml/data/DFFTConv_Real_8_8.csv +65 -0
  83. teradataml/data/Employee.csv +5 -0
  84. teradataml/data/Employee_Address.csv +4 -0
  85. teradataml/data/Employee_roles.csv +5 -0
  86. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  87. teradataml/data/Mall_customer_data.csv +201 -0
  88. teradataml/data/Orders1_12mf.csv +25 -0
  89. teradataml/data/Pi_loan.csv +7 -0
  90. teradataml/data/SMOOTHED_DATA.csv +7 -0
  91. teradataml/data/TestDFFT8.csv +9 -0
  92. teradataml/data/TestRiver.csv +109 -0
  93. teradataml/data/Traindata.csv +28 -0
  94. teradataml/data/__init__.py +0 -0
  95. teradataml/data/acf.csv +17 -0
  96. teradataml/data/adaboost_example.json +34 -0
  97. teradataml/data/adaboostpredict_example.json +24 -0
  98. teradataml/data/additional_table.csv +11 -0
  99. teradataml/data/admissions_test.csv +21 -0
  100. teradataml/data/admissions_train.csv +41 -0
  101. teradataml/data/admissions_train_nulls.csv +41 -0
  102. teradataml/data/advertising.csv +201 -0
  103. teradataml/data/ageandheight.csv +13 -0
  104. teradataml/data/ageandpressure.csv +31 -0
  105. teradataml/data/amazon_reviews_25.csv +26 -0
  106. teradataml/data/antiselect_example.json +36 -0
  107. teradataml/data/antiselect_input.csv +8 -0
  108. teradataml/data/antiselect_input_mixed_case.csv +8 -0
  109. teradataml/data/applicant_external.csv +7 -0
  110. teradataml/data/applicant_reference.csv +7 -0
  111. teradataml/data/apriori_example.json +22 -0
  112. teradataml/data/arima_example.json +9 -0
  113. teradataml/data/assortedtext_input.csv +8 -0
  114. teradataml/data/attribution_example.json +34 -0
  115. teradataml/data/attribution_sample_table.csv +27 -0
  116. teradataml/data/attribution_sample_table1.csv +6 -0
  117. teradataml/data/attribution_sample_table2.csv +11 -0
  118. teradataml/data/bank_churn.csv +10001 -0
  119. teradataml/data/bank_marketing.csv +11163 -0
  120. teradataml/data/bank_web_clicks1.csv +43 -0
  121. teradataml/data/bank_web_clicks2.csv +91 -0
  122. teradataml/data/bank_web_url.csv +85 -0
  123. teradataml/data/barrier.csv +2 -0
  124. teradataml/data/barrier_new.csv +3 -0
  125. teradataml/data/betweenness_example.json +14 -0
  126. teradataml/data/bike_sharing.csv +732 -0
  127. teradataml/data/bin_breaks.csv +8 -0
  128. teradataml/data/bin_fit_ip.csv +4 -0
  129. teradataml/data/binary_complex_left.csv +11 -0
  130. teradataml/data/binary_complex_right.csv +11 -0
  131. teradataml/data/binary_matrix_complex_left.csv +21 -0
  132. teradataml/data/binary_matrix_complex_right.csv +21 -0
  133. teradataml/data/binary_matrix_real_left.csv +21 -0
  134. teradataml/data/binary_matrix_real_right.csv +21 -0
  135. teradataml/data/blood2ageandweight.csv +26 -0
  136. teradataml/data/bmi.csv +501 -0
  137. teradataml/data/boston.csv +507 -0
  138. teradataml/data/boston2cols.csv +721 -0
  139. teradataml/data/breast_cancer.csv +570 -0
  140. teradataml/data/buoydata_mix.csv +11 -0
  141. teradataml/data/burst_data.csv +5 -0
  142. teradataml/data/burst_example.json +21 -0
  143. teradataml/data/byom_example.json +34 -0
  144. teradataml/data/bytes_table.csv +4 -0
  145. teradataml/data/cal_housing_ex_raw.csv +70 -0
  146. teradataml/data/callers.csv +7 -0
  147. teradataml/data/calls.csv +10 -0
  148. teradataml/data/cars_hist.csv +33 -0
  149. teradataml/data/cat_table.csv +25 -0
  150. teradataml/data/ccm_example.json +32 -0
  151. teradataml/data/ccm_input.csv +91 -0
  152. teradataml/data/ccm_input2.csv +13 -0
  153. teradataml/data/ccmexample.csv +101 -0
  154. teradataml/data/ccmprepare_example.json +9 -0
  155. teradataml/data/ccmprepare_input.csv +91 -0
  156. teradataml/data/cfilter_example.json +12 -0
  157. teradataml/data/changepointdetection_example.json +18 -0
  158. teradataml/data/changepointdetectionrt_example.json +8 -0
  159. teradataml/data/chi_sq.csv +3 -0
  160. teradataml/data/churn_data.csv +14 -0
  161. teradataml/data/churn_emission.csv +35 -0
  162. teradataml/data/churn_initial.csv +3 -0
  163. teradataml/data/churn_state_transition.csv +5 -0
  164. teradataml/data/citedges_2.csv +745 -0
  165. teradataml/data/citvertices_2.csv +1210 -0
  166. teradataml/data/clicks2.csv +16 -0
  167. teradataml/data/clickstream.csv +13 -0
  168. teradataml/data/clickstream1.csv +11 -0
  169. teradataml/data/closeness_example.json +16 -0
  170. teradataml/data/complaints.csv +21 -0
  171. teradataml/data/complaints_mini.csv +3 -0
  172. teradataml/data/complaints_test_tokenized.csv +353 -0
  173. teradataml/data/complaints_testtoken.csv +224 -0
  174. teradataml/data/complaints_tokens_model.csv +348 -0
  175. teradataml/data/complaints_tokens_test.csv +353 -0
  176. teradataml/data/complaints_traintoken.csv +472 -0
  177. teradataml/data/computers_category.csv +1001 -0
  178. teradataml/data/computers_test1.csv +1252 -0
  179. teradataml/data/computers_train1.csv +5009 -0
  180. teradataml/data/computers_train1_clustered.csv +5009 -0
  181. teradataml/data/confusionmatrix_example.json +9 -0
  182. teradataml/data/conversion_event_table.csv +3 -0
  183. teradataml/data/corr_input.csv +17 -0
  184. teradataml/data/correlation_example.json +11 -0
  185. teradataml/data/covid_confirm_sd.csv +83 -0
  186. teradataml/data/coxhazardratio_example.json +39 -0
  187. teradataml/data/coxph_example.json +15 -0
  188. teradataml/data/coxsurvival_example.json +28 -0
  189. teradataml/data/cpt.csv +41 -0
  190. teradataml/data/credit_ex_merged.csv +45 -0
  191. teradataml/data/creditcard_data.csv +1001 -0
  192. teradataml/data/customer_loyalty.csv +301 -0
  193. teradataml/data/customer_loyalty_newseq.csv +31 -0
  194. teradataml/data/customer_segmentation_test.csv +2628 -0
  195. teradataml/data/customer_segmentation_train.csv +8069 -0
  196. teradataml/data/dataframe_example.json +173 -0
  197. teradataml/data/decisionforest_example.json +37 -0
  198. teradataml/data/decisionforestpredict_example.json +38 -0
  199. teradataml/data/decisiontree_example.json +21 -0
  200. teradataml/data/decisiontreepredict_example.json +45 -0
  201. teradataml/data/dfft2_size4_real.csv +17 -0
  202. teradataml/data/dfft2_test_matrix16.csv +17 -0
  203. teradataml/data/dfft2conv_real_4_4.csv +65 -0
  204. teradataml/data/diabetes.csv +443 -0
  205. teradataml/data/diabetes_test.csv +89 -0
  206. teradataml/data/dict_table.csv +5 -0
  207. teradataml/data/docperterm_table.csv +4 -0
  208. teradataml/data/docs/__init__.py +1 -0
  209. teradataml/data/docs/byom/__init__.py +0 -0
  210. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
  211. teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
  212. teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
  213. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  214. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
  215. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  216. teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
  217. teradataml/data/docs/byom/docs/__init__.py +0 -0
  218. teradataml/data/docs/sqle/__init__.py +0 -0
  219. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
  220. teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
  221. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
  222. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
  223. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
  224. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
  225. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
  226. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
  227. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
  228. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
  229. teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
  230. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
  231. teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
  232. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
  233. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
  234. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
  235. teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
  236. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
  237. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
  238. teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
  239. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
  240. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
  241. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
  242. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
  243. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
  244. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
  245. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
  246. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
  247. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
  248. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
  249. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
  250. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
  251. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
  252. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
  253. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
  254. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
  255. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
  256. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
  257. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
  258. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
  259. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
  260. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
  261. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
  262. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
  263. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
  264. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
  265. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
  266. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
  267. teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
  268. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
  269. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
  270. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  271. teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
  272. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
  273. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
  274. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  275. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
  276. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
  277. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
  278. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
  279. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
  280. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
  281. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
  282. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
  283. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
  284. teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
  285. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
  286. teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
  287. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
  288. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
  289. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
  290. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
  291. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
  292. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
  293. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
  294. teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
  295. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
  296. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
  297. teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
  298. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
  299. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  300. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
  301. teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
  302. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  303. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
  304. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
  305. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
  306. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
  307. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
  308. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
  309. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
  310. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
  311. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
  312. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
  313. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
  314. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
  315. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
  316. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
  317. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
  318. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  319. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
  320. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
  321. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
  322. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
  323. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
  324. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
  325. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
  326. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
  327. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
  328. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
  329. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
  330. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  331. teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
  332. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
  333. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
  334. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
  335. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
  336. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
  337. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
  338. teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
  339. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
  340. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
  341. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
  342. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
  343. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
  344. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
  345. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
  346. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  347. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  348. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
  349. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
  350. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  351. teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
  352. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
  353. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
  354. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
  355. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
  356. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  357. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
  358. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
  359. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
  360. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
  361. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
  362. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
  363. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
  364. teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
  365. teradataml/data/docs/tableoperator/__init__.py +0 -0
  366. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
  367. teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
  368. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
  369. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
  370. teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
  371. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
  372. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
  373. teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
  374. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  375. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
  376. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
  377. teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
  378. teradataml/data/docs/uaf/__init__.py +0 -0
  379. teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
  380. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
  381. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
  382. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
  383. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  384. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  385. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
  386. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
  387. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
  388. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
  389. teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
  390. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
  391. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  392. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
  393. teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
  394. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
  395. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
  396. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
  397. teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
  398. teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
  399. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  400. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
  401. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
  402. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
  403. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
  404. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  405. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
  406. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
  407. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
  408. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
  409. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
  410. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
  411. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
  412. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  413. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  414. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  415. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
  416. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
  417. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
  418. teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
  419. teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
  420. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  421. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
  422. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
  423. teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
  424. teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
  425. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
  426. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
  427. teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
  428. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  429. teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
  430. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
  431. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
  432. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
  433. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
  434. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
  435. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
  436. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
  437. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
  438. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
  439. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
  440. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  441. teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
  442. teradataml/data/dtw_example.json +18 -0
  443. teradataml/data/dtw_t1.csv +11 -0
  444. teradataml/data/dtw_t2.csv +4 -0
  445. teradataml/data/dwt2d_dataTable.csv +65 -0
  446. teradataml/data/dwt2d_example.json +16 -0
  447. teradataml/data/dwt_dataTable.csv +8 -0
  448. teradataml/data/dwt_example.json +15 -0
  449. teradataml/data/dwt_filterTable.csv +3 -0
  450. teradataml/data/dwt_filter_dim.csv +5 -0
  451. teradataml/data/emission.csv +9 -0
  452. teradataml/data/emp_table_by_dept.csv +19 -0
  453. teradataml/data/employee_info.csv +4 -0
  454. teradataml/data/employee_table.csv +6 -0
  455. teradataml/data/excluding_event_table.csv +2 -0
  456. teradataml/data/finance_data.csv +6 -0
  457. teradataml/data/finance_data2.csv +61 -0
  458. teradataml/data/finance_data3.csv +93 -0
  459. teradataml/data/finance_data4.csv +13 -0
  460. teradataml/data/fish.csv +160 -0
  461. teradataml/data/fm_blood2ageandweight.csv +26 -0
  462. teradataml/data/fmeasure_example.json +12 -0
  463. teradataml/data/followers_leaders.csv +10 -0
  464. teradataml/data/fpgrowth_example.json +12 -0
  465. teradataml/data/frequentpaths_example.json +29 -0
  466. teradataml/data/friends.csv +9 -0
  467. teradataml/data/fs_input.csv +33 -0
  468. teradataml/data/fs_input1.csv +33 -0
  469. teradataml/data/genData.csv +513 -0
  470. teradataml/data/geodataframe_example.json +40 -0
  471. teradataml/data/glass_types.csv +215 -0
  472. teradataml/data/glm_admissions_model.csv +12 -0
  473. teradataml/data/glm_example.json +56 -0
  474. teradataml/data/glml1l2_example.json +28 -0
  475. teradataml/data/glml1l2predict_example.json +54 -0
  476. teradataml/data/glmpredict_example.json +54 -0
  477. teradataml/data/gq_t1.csv +21 -0
  478. teradataml/data/grocery_transaction.csv +19 -0
  479. teradataml/data/hconvolve_complex_right.csv +5 -0
  480. teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
  481. teradataml/data/histogram_example.json +12 -0
  482. teradataml/data/hmmdecoder_example.json +79 -0
  483. teradataml/data/hmmevaluator_example.json +25 -0
  484. teradataml/data/hmmsupervised_example.json +10 -0
  485. teradataml/data/hmmunsupervised_example.json +8 -0
  486. teradataml/data/hnsw_alter_data.csv +5 -0
  487. teradataml/data/hnsw_data.csv +10 -0
  488. teradataml/data/house_values.csv +12 -0
  489. teradataml/data/house_values2.csv +13 -0
  490. teradataml/data/housing_cat.csv +7 -0
  491. teradataml/data/housing_data.csv +9 -0
  492. teradataml/data/housing_test.csv +47 -0
  493. teradataml/data/housing_test_binary.csv +47 -0
  494. teradataml/data/housing_train.csv +493 -0
  495. teradataml/data/housing_train_attribute.csv +5 -0
  496. teradataml/data/housing_train_binary.csv +437 -0
  497. teradataml/data/housing_train_parameter.csv +2 -0
  498. teradataml/data/housing_train_response.csv +493 -0
  499. teradataml/data/housing_train_segment.csv +201 -0
  500. teradataml/data/ibm_stock.csv +370 -0
  501. teradataml/data/ibm_stock1.csv +370 -0
  502. teradataml/data/identitymatch_example.json +22 -0
  503. teradataml/data/idf_table.csv +4 -0
  504. teradataml/data/idwt2d_dataTable.csv +5 -0
  505. teradataml/data/idwt_dataTable.csv +8 -0
  506. teradataml/data/idwt_filterTable.csv +3 -0
  507. teradataml/data/impressions.csv +101 -0
  508. teradataml/data/inflation.csv +21 -0
  509. teradataml/data/initial.csv +3 -0
  510. teradataml/data/insect2Cols.csv +61 -0
  511. teradataml/data/insect_sprays.csv +13 -0
  512. teradataml/data/insurance.csv +1339 -0
  513. teradataml/data/interpolator_example.json +13 -0
  514. teradataml/data/interval_data.csv +5 -0
  515. teradataml/data/iris_altinput.csv +481 -0
  516. teradataml/data/iris_attribute_output.csv +8 -0
  517. teradataml/data/iris_attribute_test.csv +121 -0
  518. teradataml/data/iris_attribute_train.csv +481 -0
  519. teradataml/data/iris_category_expect_predict.csv +31 -0
  520. teradataml/data/iris_data.csv +151 -0
  521. teradataml/data/iris_input.csv +151 -0
  522. teradataml/data/iris_response_train.csv +121 -0
  523. teradataml/data/iris_test.csv +31 -0
  524. teradataml/data/iris_train.csv +121 -0
  525. teradataml/data/join_table1.csv +4 -0
  526. teradataml/data/join_table2.csv +4 -0
  527. teradataml/data/jsons/anly_function_name.json +7 -0
  528. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  529. teradataml/data/jsons/byom/dataikupredict.json +148 -0
  530. teradataml/data/jsons/byom/datarobotpredict.json +147 -0
  531. teradataml/data/jsons/byom/h2opredict.json +195 -0
  532. teradataml/data/jsons/byom/onnxembeddings.json +267 -0
  533. teradataml/data/jsons/byom/onnxpredict.json +187 -0
  534. teradataml/data/jsons/byom/pmmlpredict.json +147 -0
  535. teradataml/data/jsons/paired_functions.json +450 -0
  536. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
  537. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
  538. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
  539. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
  540. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
  541. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
  542. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
  543. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
  544. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
  545. teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
  546. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
  547. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
  548. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
  549. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
  550. teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
  551. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
  552. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
  553. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
  554. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
  555. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
  556. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
  557. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
  558. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
  559. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
  560. teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
  561. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
  562. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
  563. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
  564. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
  565. teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
  566. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
  567. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
  568. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
  569. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
  570. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
  571. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
  572. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
  573. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
  574. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
  575. teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
  576. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
  577. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
  578. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
  579. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
  580. teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
  581. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
  582. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
  583. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
  584. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
  585. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
  586. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
  587. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
  588. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
  589. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
  590. teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
  591. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
  592. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
  593. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
  594. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
  595. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
  596. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
  597. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
  598. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
  599. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
  600. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
  601. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
  602. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
  603. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
  604. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
  605. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
  606. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
  607. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
  608. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
  609. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
  610. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
  611. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
  612. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
  613. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
  614. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
  615. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
  616. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
  617. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
  618. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
  619. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
  620. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
  621. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
  622. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
  623. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
  624. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
  625. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
  626. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
  627. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
  628. teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
  629. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
  630. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
  631. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
  632. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
  633. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
  634. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
  635. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
  636. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
  637. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
  638. teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
  639. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
  640. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
  641. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
  642. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
  643. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  644. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
  645. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
  646. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  647. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
  648. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
  649. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
  650. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
  651. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
  652. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
  653. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
  654. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
  655. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
  656. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
  657. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
  658. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
  659. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
  660. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
  661. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
  662. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
  663. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
  664. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
  665. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
  666. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
  667. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
  668. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
  669. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
  670. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  671. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  672. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  673. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
  674. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
  675. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
  676. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
  677. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
  678. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
  679. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
  680. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
  681. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
  682. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
  683. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
  684. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
  685. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  686. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
  687. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
  688. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
  689. teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
  690. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
  691. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
  692. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
  693. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
  694. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
  695. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
  696. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
  697. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  698. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
  699. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
  700. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
  701. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
  702. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
  703. teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
  704. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
  705. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
  706. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
  707. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
  708. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  709. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
  710. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
  711. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  712. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
  713. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
  714. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
  715. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  716. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
  717. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
  718. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
  719. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
  720. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
  721. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
  722. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
  723. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
  724. teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
  725. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
  726. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
  727. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
  728. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
  729. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
  730. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
  731. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
  732. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
  733. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
  734. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
  735. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
  736. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  737. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  738. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  739. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  740. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  741. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  742. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  743. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  744. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  745. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  746. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  747. teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
  748. teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
  749. teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
  750. teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
  751. teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
  752. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  753. teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
  754. teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
  755. teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
  756. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
  757. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
  758. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
  759. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  760. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  761. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
  762. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
  763. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
  764. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
  765. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
  766. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
  767. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
  768. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
  769. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
  770. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
  771. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
  772. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
  773. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
  774. teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
  775. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
  776. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  777. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  778. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
  779. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
  780. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
  781. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
  782. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
  783. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
  784. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
  785. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
  786. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  787. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  788. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
  789. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  790. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
  791. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
  792. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
  793. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  794. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
  795. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
  796. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
  797. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
  798. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
  799. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
  800. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
  801. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
  802. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  803. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
  804. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
  805. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
  806. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
  807. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
  808. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
  809. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
  810. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
  811. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
  812. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
  813. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
  814. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  815. teradataml/data/kmeans_example.json +23 -0
  816. teradataml/data/kmeans_table.csv +10 -0
  817. teradataml/data/kmeans_us_arrests_data.csv +51 -0
  818. teradataml/data/knn_example.json +19 -0
  819. teradataml/data/knnrecommender_example.json +7 -0
  820. teradataml/data/knnrecommenderpredict_example.json +12 -0
  821. teradataml/data/lar_example.json +17 -0
  822. teradataml/data/larpredict_example.json +30 -0
  823. teradataml/data/lc_new_predictors.csv +5 -0
  824. teradataml/data/lc_new_reference.csv +9 -0
  825. teradataml/data/lda_example.json +9 -0
  826. teradataml/data/ldainference_example.json +15 -0
  827. teradataml/data/ldatopicsummary_example.json +9 -0
  828. teradataml/data/levendist_input.csv +13 -0
  829. teradataml/data/levenshteindistance_example.json +10 -0
  830. teradataml/data/linreg_example.json +10 -0
  831. teradataml/data/load_example_data.py +350 -0
  832. teradataml/data/loan_prediction.csv +295 -0
  833. teradataml/data/lungcancer.csv +138 -0
  834. teradataml/data/mappingdata.csv +12 -0
  835. teradataml/data/medical_readings.csv +101 -0
  836. teradataml/data/milk_timeseries.csv +157 -0
  837. teradataml/data/min_max_titanic.csv +4 -0
  838. teradataml/data/minhash_example.json +6 -0
  839. teradataml/data/ml_ratings.csv +7547 -0
  840. teradataml/data/ml_ratings_10.csv +2445 -0
  841. teradataml/data/mobile_data.csv +13 -0
  842. teradataml/data/model1_table.csv +5 -0
  843. teradataml/data/model2_table.csv +5 -0
  844. teradataml/data/models/License_file.txt +1 -0
  845. teradataml/data/models/License_file_empty.txt +0 -0
  846. teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
  847. teradataml/data/models/dr_iris_rf +0 -0
  848. teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
  849. teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
  850. teradataml/data/models/iris_db_glm_model.pmml +57 -0
  851. teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
  852. teradataml/data/models/iris_kmeans_model +0 -0
  853. teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
  854. teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
  855. teradataml/data/modularity_example.json +12 -0
  856. teradataml/data/movavg_example.json +8 -0
  857. teradataml/data/mtx1.csv +7 -0
  858. teradataml/data/mtx2.csv +13 -0
  859. teradataml/data/multi_model_classification.csv +401 -0
  860. teradataml/data/multi_model_regression.csv +401 -0
  861. teradataml/data/mvdfft8.csv +9 -0
  862. teradataml/data/naivebayes_example.json +10 -0
  863. teradataml/data/naivebayespredict_example.json +19 -0
  864. teradataml/data/naivebayestextclassifier2_example.json +7 -0
  865. teradataml/data/naivebayestextclassifier_example.json +8 -0
  866. teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
  867. teradataml/data/name_Find_configure.csv +10 -0
  868. teradataml/data/namedentityfinder_example.json +14 -0
  869. teradataml/data/namedentityfinderevaluator_example.json +10 -0
  870. teradataml/data/namedentityfindertrainer_example.json +6 -0
  871. teradataml/data/nb_iris_input_test.csv +31 -0
  872. teradataml/data/nb_iris_input_train.csv +121 -0
  873. teradataml/data/nbp_iris_model.csv +13 -0
  874. teradataml/data/ner_dict.csv +8 -0
  875. teradataml/data/ner_extractor_text.csv +2 -0
  876. teradataml/data/ner_input_eng.csv +7 -0
  877. teradataml/data/ner_rule.csv +5 -0
  878. teradataml/data/ner_sports_test2.csv +29 -0
  879. teradataml/data/ner_sports_train.csv +501 -0
  880. teradataml/data/nerevaluator_example.json +6 -0
  881. teradataml/data/nerextractor_example.json +18 -0
  882. teradataml/data/nermem_sports_test.csv +18 -0
  883. teradataml/data/nermem_sports_train.csv +51 -0
  884. teradataml/data/nertrainer_example.json +7 -0
  885. teradataml/data/ngrams_example.json +7 -0
  886. teradataml/data/notebooks/__init__.py +0 -0
  887. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
  888. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
  889. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
  890. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
  891. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
  892. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
  893. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
  894. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
  895. teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
  896. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
  897. teradataml/data/npath_example.json +23 -0
  898. teradataml/data/ntree_example.json +14 -0
  899. teradataml/data/numeric_strings.csv +5 -0
  900. teradataml/data/numerics.csv +4 -0
  901. teradataml/data/ocean_buoy.csv +17 -0
  902. teradataml/data/ocean_buoy2.csv +17 -0
  903. teradataml/data/ocean_buoys.csv +28 -0
  904. teradataml/data/ocean_buoys2.csv +10 -0
  905. teradataml/data/ocean_buoys_nonpti.csv +28 -0
  906. teradataml/data/ocean_buoys_seq.csv +29 -0
  907. teradataml/data/onehot_encoder_train.csv +4 -0
  908. teradataml/data/openml_example.json +92 -0
  909. teradataml/data/optional_event_table.csv +4 -0
  910. teradataml/data/orders1.csv +11 -0
  911. teradataml/data/orders1_12.csv +13 -0
  912. teradataml/data/orders_ex.csv +4 -0
  913. teradataml/data/pack_example.json +9 -0
  914. teradataml/data/package_tracking.csv +19 -0
  915. teradataml/data/package_tracking_pti.csv +19 -0
  916. teradataml/data/pagerank_example.json +13 -0
  917. teradataml/data/paragraphs_input.csv +6 -0
  918. teradataml/data/pathanalyzer_example.json +8 -0
  919. teradataml/data/pathgenerator_example.json +8 -0
  920. teradataml/data/patient_profile.csv +101 -0
  921. teradataml/data/pattern_matching_data.csv +11 -0
  922. teradataml/data/payment_fraud_dataset.csv +10001 -0
  923. teradataml/data/peppers.png +0 -0
  924. teradataml/data/phrases.csv +7 -0
  925. teradataml/data/pivot_example.json +9 -0
  926. teradataml/data/pivot_input.csv +22 -0
  927. teradataml/data/playerRating.csv +31 -0
  928. teradataml/data/pos_input.csv +40 -0
  929. teradataml/data/postagger_example.json +7 -0
  930. teradataml/data/posttagger_output.csv +44 -0
  931. teradataml/data/production_data.csv +17 -0
  932. teradataml/data/production_data2.csv +7 -0
  933. teradataml/data/randomsample_example.json +32 -0
  934. teradataml/data/randomwalksample_example.json +9 -0
  935. teradataml/data/rank_table.csv +6 -0
  936. teradataml/data/real_values.csv +14 -0
  937. teradataml/data/ref_mobile_data.csv +4 -0
  938. teradataml/data/ref_mobile_data_dense.csv +2 -0
  939. teradataml/data/ref_url.csv +17 -0
  940. teradataml/data/restaurant_reviews.csv +7 -0
  941. teradataml/data/retail_churn_table.csv +27772 -0
  942. teradataml/data/river_data.csv +145 -0
  943. teradataml/data/roc_example.json +8 -0
  944. teradataml/data/roc_input.csv +101 -0
  945. teradataml/data/rule_inputs.csv +6 -0
  946. teradataml/data/rule_table.csv +2 -0
  947. teradataml/data/sales.csv +7 -0
  948. teradataml/data/sales_transaction.csv +501 -0
  949. teradataml/data/salesdata.csv +342 -0
  950. teradataml/data/sample_cities.csv +3 -0
  951. teradataml/data/sample_shapes.csv +11 -0
  952. teradataml/data/sample_streets.csv +3 -0
  953. teradataml/data/sampling_example.json +16 -0
  954. teradataml/data/sax_example.json +17 -0
  955. teradataml/data/scale_attributes.csv +3 -0
  956. teradataml/data/scale_example.json +74 -0
  957. teradataml/data/scale_housing.csv +11 -0
  958. teradataml/data/scale_housing_test.csv +6 -0
  959. teradataml/data/scale_input_part_sparse.csv +31 -0
  960. teradataml/data/scale_input_partitioned.csv +16 -0
  961. teradataml/data/scale_input_sparse.csv +11 -0
  962. teradataml/data/scale_parameters.csv +3 -0
  963. teradataml/data/scale_stat.csv +11 -0
  964. teradataml/data/scalebypartition_example.json +13 -0
  965. teradataml/data/scalemap_example.json +13 -0
  966. teradataml/data/scalesummary_example.json +12 -0
  967. teradataml/data/score_category.csv +101 -0
  968. teradataml/data/score_summary.csv +4 -0
  969. teradataml/data/script_example.json +10 -0
  970. teradataml/data/scripts/deploy_script.py +84 -0
  971. teradataml/data/scripts/lightgbm/dataset.template +175 -0
  972. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
  973. teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
  974. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
  975. teradataml/data/scripts/mapper.R +20 -0
  976. teradataml/data/scripts/mapper.py +16 -0
  977. teradataml/data/scripts/mapper_replace.py +16 -0
  978. teradataml/data/scripts/sklearn/__init__.py +0 -0
  979. teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
  980. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
  981. teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
  982. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
  983. teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
  984. teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
  985. teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
  986. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  987. teradataml/data/seeds.csv +10 -0
  988. teradataml/data/sentenceextractor_example.json +7 -0
  989. teradataml/data/sentiment_extract_input.csv +11 -0
  990. teradataml/data/sentiment_train.csv +16 -0
  991. teradataml/data/sentiment_word.csv +20 -0
  992. teradataml/data/sentiment_word_input.csv +20 -0
  993. teradataml/data/sentimentextractor_example.json +24 -0
  994. teradataml/data/sentimenttrainer_example.json +8 -0
  995. teradataml/data/sequence_table.csv +10 -0
  996. teradataml/data/seriessplitter_example.json +8 -0
  997. teradataml/data/sessionize_example.json +17 -0
  998. teradataml/data/sessionize_table.csv +116 -0
  999. teradataml/data/setop_test1.csv +24 -0
  1000. teradataml/data/setop_test2.csv +22 -0
  1001. teradataml/data/soc_nw_edges.csv +11 -0
  1002. teradataml/data/soc_nw_vertices.csv +8 -0
  1003. teradataml/data/souvenir_timeseries.csv +168 -0
  1004. teradataml/data/sparse_iris_attribute.csv +5 -0
  1005. teradataml/data/sparse_iris_test.csv +121 -0
  1006. teradataml/data/sparse_iris_train.csv +601 -0
  1007. teradataml/data/star1.csv +6 -0
  1008. teradataml/data/star_pivot.csv +8 -0
  1009. teradataml/data/state_transition.csv +5 -0
  1010. teradataml/data/stock_data.csv +53 -0
  1011. teradataml/data/stock_movement.csv +11 -0
  1012. teradataml/data/stock_vol.csv +76 -0
  1013. teradataml/data/stop_words.csv +8 -0
  1014. teradataml/data/store_sales.csv +37 -0
  1015. teradataml/data/stringsimilarity_example.json +8 -0
  1016. teradataml/data/strsimilarity_input.csv +13 -0
  1017. teradataml/data/students.csv +101 -0
  1018. teradataml/data/svm_iris_input_test.csv +121 -0
  1019. teradataml/data/svm_iris_input_train.csv +481 -0
  1020. teradataml/data/svm_iris_model.csv +7 -0
  1021. teradataml/data/svmdense_example.json +10 -0
  1022. teradataml/data/svmdensepredict_example.json +19 -0
  1023. teradataml/data/svmsparse_example.json +8 -0
  1024. teradataml/data/svmsparsepredict_example.json +14 -0
  1025. teradataml/data/svmsparsesummary_example.json +8 -0
  1026. teradataml/data/target_mobile_data.csv +13 -0
  1027. teradataml/data/target_mobile_data_dense.csv +5 -0
  1028. teradataml/data/target_udt_data.csv +8 -0
  1029. teradataml/data/tdnerextractor_example.json +14 -0
  1030. teradataml/data/templatedata.csv +1201 -0
  1031. teradataml/data/templates/open_source_ml.json +11 -0
  1032. teradataml/data/teradata_icon.ico +0 -0
  1033. teradataml/data/teradataml_example.json +1473 -0
  1034. teradataml/data/test_classification.csv +101 -0
  1035. teradataml/data/test_loan_prediction.csv +53 -0
  1036. teradataml/data/test_pacf_12.csv +37 -0
  1037. teradataml/data/test_prediction.csv +101 -0
  1038. teradataml/data/test_regression.csv +101 -0
  1039. teradataml/data/test_river2.csv +109 -0
  1040. teradataml/data/text_inputs.csv +6 -0
  1041. teradataml/data/textchunker_example.json +8 -0
  1042. teradataml/data/textclassifier_example.json +7 -0
  1043. teradataml/data/textclassifier_input.csv +7 -0
  1044. teradataml/data/textclassifiertrainer_example.json +7 -0
  1045. teradataml/data/textmorph_example.json +11 -0
  1046. teradataml/data/textparser_example.json +15 -0
  1047. teradataml/data/texttagger_example.json +12 -0
  1048. teradataml/data/texttokenizer_example.json +7 -0
  1049. teradataml/data/texttrainer_input.csv +11 -0
  1050. teradataml/data/tf_example.json +7 -0
  1051. teradataml/data/tfidf_example.json +14 -0
  1052. teradataml/data/tfidf_input1.csv +201 -0
  1053. teradataml/data/tfidf_train.csv +6 -0
  1054. teradataml/data/time_table1.csv +535 -0
  1055. teradataml/data/time_table2.csv +14 -0
  1056. teradataml/data/timeseriesdata.csv +1601 -0
  1057. teradataml/data/timeseriesdatasetsd4.csv +105 -0
  1058. teradataml/data/timestamp_data.csv +4 -0
  1059. teradataml/data/titanic.csv +892 -0
  1060. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  1061. teradataml/data/to_num_data.csv +4 -0
  1062. teradataml/data/tochar_data.csv +5 -0
  1063. teradataml/data/token_table.csv +696 -0
  1064. teradataml/data/train_multiclass.csv +101 -0
  1065. teradataml/data/train_regression.csv +101 -0
  1066. teradataml/data/train_regression_multiple_labels.csv +101 -0
  1067. teradataml/data/train_tracking.csv +28 -0
  1068. teradataml/data/trans_dense.csv +16 -0
  1069. teradataml/data/trans_sparse.csv +55 -0
  1070. teradataml/data/transformation_table.csv +6 -0
  1071. teradataml/data/transformation_table_new.csv +2 -0
  1072. teradataml/data/tv_spots.csv +16 -0
  1073. teradataml/data/twod_climate_data.csv +117 -0
  1074. teradataml/data/uaf_example.json +529 -0
  1075. teradataml/data/univariatestatistics_example.json +9 -0
  1076. teradataml/data/unpack_example.json +10 -0
  1077. teradataml/data/unpivot_example.json +25 -0
  1078. teradataml/data/unpivot_input.csv +8 -0
  1079. teradataml/data/url_data.csv +10 -0
  1080. teradataml/data/us_air_pass.csv +37 -0
  1081. teradataml/data/us_population.csv +624 -0
  1082. teradataml/data/us_states_shapes.csv +52 -0
  1083. teradataml/data/varmax_example.json +18 -0
  1084. teradataml/data/vectordistance_example.json +30 -0
  1085. teradataml/data/ville_climatedata.csv +121 -0
  1086. teradataml/data/ville_tempdata.csv +12 -0
  1087. teradataml/data/ville_tempdata1.csv +12 -0
  1088. teradataml/data/ville_temperature.csv +11 -0
  1089. teradataml/data/waveletTable.csv +1605 -0
  1090. teradataml/data/waveletTable2.csv +1605 -0
  1091. teradataml/data/weightedmovavg_example.json +9 -0
  1092. teradataml/data/wft_testing.csv +5 -0
  1093. teradataml/data/windowdfft.csv +16 -0
  1094. teradataml/data/wine_data.csv +1600 -0
  1095. teradataml/data/word_embed_input_table1.csv +6 -0
  1096. teradataml/data/word_embed_input_table2.csv +5 -0
  1097. teradataml/data/word_embed_model.csv +23 -0
  1098. teradataml/data/words_input.csv +13 -0
  1099. teradataml/data/xconvolve_complex_left.csv +6 -0
  1100. teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
  1101. teradataml/data/xgboost_example.json +36 -0
  1102. teradataml/data/xgboostpredict_example.json +32 -0
  1103. teradataml/data/ztest_example.json +16 -0
  1104. teradataml/dataframe/__init__.py +0 -0
  1105. teradataml/dataframe/copy_to.py +2446 -0
  1106. teradataml/dataframe/data_transfer.py +2840 -0
  1107. teradataml/dataframe/dataframe.py +20908 -0
  1108. teradataml/dataframe/dataframe_utils.py +2114 -0
  1109. teradataml/dataframe/fastload.py +794 -0
  1110. teradataml/dataframe/functions.py +2110 -0
  1111. teradataml/dataframe/indexer.py +424 -0
  1112. teradataml/dataframe/row.py +160 -0
  1113. teradataml/dataframe/setop.py +1171 -0
  1114. teradataml/dataframe/sql.py +10904 -0
  1115. teradataml/dataframe/sql_function_parameters.py +440 -0
  1116. teradataml/dataframe/sql_functions.py +652 -0
  1117. teradataml/dataframe/sql_interfaces.py +220 -0
  1118. teradataml/dataframe/vantage_function_types.py +675 -0
  1119. teradataml/dataframe/window.py +694 -0
  1120. teradataml/dbutils/__init__.py +3 -0
  1121. teradataml/dbutils/dbutils.py +2871 -0
  1122. teradataml/dbutils/filemgr.py +318 -0
  1123. teradataml/gen_ai/__init__.py +2 -0
  1124. teradataml/gen_ai/convAI.py +473 -0
  1125. teradataml/geospatial/__init__.py +4 -0
  1126. teradataml/geospatial/geodataframe.py +1105 -0
  1127. teradataml/geospatial/geodataframecolumn.py +392 -0
  1128. teradataml/geospatial/geometry_types.py +926 -0
  1129. teradataml/hyperparameter_tuner/__init__.py +1 -0
  1130. teradataml/hyperparameter_tuner/optimizer.py +4115 -0
  1131. teradataml/hyperparameter_tuner/utils.py +303 -0
  1132. teradataml/lib/__init__.py +0 -0
  1133. teradataml/lib/aed_0_1.dll +0 -0
  1134. teradataml/lib/libaed_0_1.dylib +0 -0
  1135. teradataml/lib/libaed_0_1.so +0 -0
  1136. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  1137. teradataml/lib/libaed_0_1_ppc64le.so +0 -0
  1138. teradataml/opensource/__init__.py +1 -0
  1139. teradataml/opensource/_base.py +1321 -0
  1140. teradataml/opensource/_class.py +464 -0
  1141. teradataml/opensource/_constants.py +61 -0
  1142. teradataml/opensource/_lightgbm.py +949 -0
  1143. teradataml/opensource/_sklearn.py +1008 -0
  1144. teradataml/opensource/_wrapper_utils.py +267 -0
  1145. teradataml/options/__init__.py +148 -0
  1146. teradataml/options/configure.py +489 -0
  1147. teradataml/options/display.py +187 -0
  1148. teradataml/plot/__init__.py +3 -0
  1149. teradataml/plot/axis.py +1427 -0
  1150. teradataml/plot/constants.py +15 -0
  1151. teradataml/plot/figure.py +431 -0
  1152. teradataml/plot/plot.py +810 -0
  1153. teradataml/plot/query_generator.py +83 -0
  1154. teradataml/plot/subplot.py +216 -0
  1155. teradataml/scriptmgmt/UserEnv.py +4273 -0
  1156. teradataml/scriptmgmt/__init__.py +3 -0
  1157. teradataml/scriptmgmt/lls_utils.py +2157 -0
  1158. teradataml/sdk/README.md +79 -0
  1159. teradataml/sdk/__init__.py +4 -0
  1160. teradataml/sdk/_auth_modes.py +422 -0
  1161. teradataml/sdk/_func_params.py +487 -0
  1162. teradataml/sdk/_json_parser.py +453 -0
  1163. teradataml/sdk/_openapi_spec_constants.py +249 -0
  1164. teradataml/sdk/_utils.py +236 -0
  1165. teradataml/sdk/api_client.py +900 -0
  1166. teradataml/sdk/constants.py +62 -0
  1167. teradataml/sdk/modelops/__init__.py +98 -0
  1168. teradataml/sdk/modelops/_client.py +409 -0
  1169. teradataml/sdk/modelops/_constants.py +304 -0
  1170. teradataml/sdk/modelops/models.py +2308 -0
  1171. teradataml/sdk/spinner.py +107 -0
  1172. teradataml/series/__init__.py +0 -0
  1173. teradataml/series/series.py +537 -0
  1174. teradataml/series/series_utils.py +71 -0
  1175. teradataml/store/__init__.py +12 -0
  1176. teradataml/store/feature_store/__init__.py +0 -0
  1177. teradataml/store/feature_store/constants.py +658 -0
  1178. teradataml/store/feature_store/feature_store.py +4814 -0
  1179. teradataml/store/feature_store/mind_map.py +639 -0
  1180. teradataml/store/feature_store/models.py +7330 -0
  1181. teradataml/store/feature_store/utils.py +390 -0
  1182. teradataml/table_operators/Apply.py +979 -0
  1183. teradataml/table_operators/Script.py +1739 -0
  1184. teradataml/table_operators/TableOperator.py +1343 -0
  1185. teradataml/table_operators/__init__.py +2 -0
  1186. teradataml/table_operators/apply_query_generator.py +262 -0
  1187. teradataml/table_operators/query_generator.py +493 -0
  1188. teradataml/table_operators/table_operator_query_generator.py +462 -0
  1189. teradataml/table_operators/table_operator_util.py +726 -0
  1190. teradataml/table_operators/templates/dataframe_apply.template +184 -0
  1191. teradataml/table_operators/templates/dataframe_map.template +176 -0
  1192. teradataml/table_operators/templates/dataframe_register.template +73 -0
  1193. teradataml/table_operators/templates/dataframe_udf.template +67 -0
  1194. teradataml/table_operators/templates/script_executor.template +170 -0
  1195. teradataml/telemetry_utils/__init__.py +0 -0
  1196. teradataml/telemetry_utils/queryband.py +53 -0
  1197. teradataml/utils/__init__.py +0 -0
  1198. teradataml/utils/docstring.py +527 -0
  1199. teradataml/utils/dtypes.py +943 -0
  1200. teradataml/utils/internal_buffer.py +122 -0
  1201. teradataml/utils/print_versions.py +206 -0
  1202. teradataml/utils/utils.py +451 -0
  1203. teradataml/utils/validators.py +3305 -0
  1204. teradataml-20.0.0.8.dist-info/METADATA +2804 -0
  1205. teradataml-20.0.0.8.dist-info/RECORD +1208 -0
  1206. teradataml-20.0.0.8.dist-info/WHEEL +5 -0
  1207. teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
  1208. teradataml-20.0.0.8.dist-info/zip-safe +1 -0
@@ -0,0 +1,1384 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2025 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Sweta Shaw
7
+ # Email Id: Sweta.Shaw@Teradata.com
8
+ #
9
+ # Secondary Owner: Akhil Bisht
10
+ # Email Id: AKHIL.BISHT@Teradata.com
11
+ #
12
+ # Version: 1.1
13
+ # Function Version: 1.0
14
+ # ##################################################################
15
+
16
+ # Python libraries
17
+ import numpy as np
18
+ import pandas as pd
19
+ import time
20
+ import warnings
21
+
22
+ # Teradata libraries
23
+ from teradataml import SMOTE
24
+ from teradataml.dataframe.dataframe import DataFrame
25
+ from teradataml.dataframe.copy_to import copy_to_sql
26
+ from teradataml import OutlierFilterFit, OutlierFilterTransform, FillRowId
27
+ from teradataml import RoundColumns, TeradataMlException
28
+ from teradataml import ScaleFit, ScaleTransform
29
+ from teradataml import UtilFuncs, TeradataConstants
30
+ from teradataml.dbutils.dbutils import execute_sql
31
+ from teradataml.common.garbagecollector import GarbageCollector
32
+ from teradataml.common.messages import Messages, MessageCodes
33
+ from teradataml.context.context import _get_current_databasename
34
+ from teradataml.utils.validators import _Validators
35
+ from teradataml import configure, INTEGER
36
+ from teradataml.common.constants import TeradataConstants
37
+
38
+
39
+ class _DataPreparation:
40
+
41
+ def __init__(self,
42
+ data,
43
+ target_column,
44
+ id_column,
45
+ verbose=0,
46
+ excluded_columns=None,
47
+ custom_data=None,
48
+ data_transform_dict=None,
49
+ task_type="Regression",
50
+ **kwargs):
51
+ """
52
+ DESCRIPTION:
53
+ Function initializes the data, target column and columns datatypes
54
+ for data preparation.
55
+
56
+ PARAMETERS:
57
+ data:
58
+ Required Argument.
59
+ Specifies the input teradataml Dataframe for data preparation phase.
60
+ Types: teradataml Dataframe
61
+
62
+ target_column:
63
+ Required Argument.
64
+ Specifies the name of the target column in "data".
65
+ Types: str
66
+
67
+ id_column:
68
+ Required Argument.
69
+ Specifies the name of the unique identifier column in "data".
70
+ Types: str
71
+
72
+ verbose:
73
+ Optional Argument.
74
+ Specifies the detailed execution steps based on verbose level.
75
+ Default Value: 0
76
+ Permitted Values:
77
+ * 0: prints the progress bar and leaderboard
78
+ * 1: prints the execution steps of AutoML.
79
+ * 2: prints the intermediate data between the execution of each step of AutoML.
80
+ Types: int
81
+
82
+ excluded_columns:
83
+ Required Argument.
84
+ Specifies the columns should be excluded from any processing.
85
+ Types: str or list of strings (str)
86
+
87
+ custom_data:
88
+ Optional Argument.
89
+ Specifies json object containing user customized input.
90
+ Types: json object
91
+
92
+ data_transform_dict:
93
+ Optional Argument.
94
+ Specifies the parameters for data transformation.
95
+ Types: dict
96
+
97
+ task_type:
98
+ Required Argument.
99
+ Specifies the task type for AutoML, whether to apply regresion OR classification
100
+ on the provived dataset.
101
+ Default Value: "Regression"
102
+ Permitted Values: "Regression", "Classification"
103
+ Types: str
104
+
105
+ **kwargs:
106
+ Specifies the additional arguments for data preparation. Below
107
+ are the additional arguments:
108
+ volatile:
109
+ Optional Argument.
110
+ Specifies whether to put the interim results of the
111
+ functions in a volatile table or not. When set to
112
+ True, results are stored in a volatile table,
113
+ otherwise not.
114
+ Default Value: False
115
+ Types: bool
116
+
117
+ persist:
118
+ Optional Argument.
119
+ Specifies whether to persist the interim results of the
120
+ functions in a table or not. When set to True,
121
+ results are persisted in a table; otherwise,
122
+ results are garbage collected at the end of the
123
+ session.
124
+ Default Value: False
125
+ Types: bool
126
+
127
+ seed:
128
+ Optional Argument.
129
+ Specifies the random seed for reproducibility.
130
+ Default Value: 42
131
+ Types: int
132
+
133
+ automl_phases:
134
+ Optional Argument.
135
+ Specifies the phase of AutoML to be executed.
136
+ Default Value: None
137
+ Types: str or list of str.
138
+
139
+ cluster:
140
+ Optional Argument.
141
+ Specifies whether to run data preparation for handling clustering.
142
+ Default Value: False
143
+ Types: bool
144
+
145
+ imbalance_handling_method:
146
+ Optional Argument.
147
+ Specifies which imbalance handling method to use.
148
+ Default Value: "SMOTE"
149
+ Permitted Values: "SMOTE", "ADASYN", "SMOTETomek", "NearMiss"
150
+ Types: str
151
+
152
+ enable_lasso:
153
+ Optional Argument.
154
+ Specifies whether to use lasso regression for feature selection.
155
+ By default, only RFE and PCA are used for feature selection.
156
+ Default Value: False
157
+ Types: bool
158
+
159
+ raise_errors:
160
+ Optional Argument.
161
+ Specifies whether to raise errors or warnings for
162
+ non-blocking errors. When set to True, raises errors,
163
+ otherwise raises warnings.
164
+ Default Value: False
165
+ Types: bool
166
+ RETURNS:
167
+ None
168
+
169
+ RAISES:
170
+ None
171
+
172
+ EXAMPLES:
173
+ >>> excluded_cols = ["id", "timestamp"]
174
+ >>> transform_dict = {"target_col_encode_ind": False, "classification_type": True}
175
+ >>> data_prep = _DataPreparation(data=df,
176
+ ... target_column="target",
177
+ ... id_column="id",
178
+ ... verbose=1,
179
+ ... excluded_columns=excluded_cols,
180
+ ... data_transform_dict=transform_dict,
181
+ ... task_type="Classification",
182
+ ... persist=True,
183
+ ... seed=42)
184
+ """
185
+ self.data = data
186
+ self.target_column = target_column
187
+ self.id_column = id_column
188
+ self.verbose = verbose
189
+ self.excluded_columns = excluded_columns
190
+ self.data_transform_dict = data_transform_dict
191
+ self.custom_data = custom_data
192
+ self.task_type = task_type
193
+ self.volatile = kwargs.get("volatile", False)
194
+ self.persist = kwargs.get("persist", False)
195
+ self.aml_phases = kwargs.get("automl_phases", None)
196
+ self.cluster = kwargs.get('cluster', False)
197
+ self._data_sampling_method = kwargs.get("imbalance_handling_method", "SMOTE")
198
+
199
+ # Setting default value for auto run mode
200
+ self._scale_method_reg = "STD"
201
+ self._scale_method_cls = "RANGE"
202
+ self._scale_method_clust = "STD"
203
+
204
+ self.data_types = {key: value for key, value in self.data._column_names_and_types}
205
+ self.seed = kwargs.get("seed", 42)
206
+ # np.random.seed() affects the random number generation in numpy and sklearn
207
+ # setting this changes the global state of the random number generator
208
+ # hence, setting the seed only if it is not None
209
+ if kwargs.get("seed") is not None:
210
+ np.random.seed(self.seed)
211
+
212
+ self.data_mapping = kwargs.get("data_mapping", {})
213
+ # Setting lasso feature selection flag
214
+ self.enable_lasso = kwargs.get('enable_lasso', False)
215
+ self.raise_errors = kwargs.get("raise_errors", False)
216
+
217
+ def data_preparation(self,
218
+ auto=True):
219
+ """
220
+ DESCRIPTION:
221
+ Function to perform following tasks:-
222
+ 1. Performs outlier processing and transformation on dataset.
223
+ 2. Performs feature selection using RFE, PCA, and Lasso.
224
+ 3. Performs feature scaling.
225
+
226
+ PARAMETERS:
227
+ auto:
228
+ Optional Argument.
229
+ Specifies whether to run AutoML in custom mode or auto mode.
230
+ When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
231
+ Default Value: True
232
+ Types: bool
233
+
234
+ RETURNS:
235
+ tuple containing, list of feature lists, data transformation dictionary,
236
+ and data mapping dictionary.
237
+
238
+ RAISES:
239
+ None
240
+
241
+ EXAMPLES:
242
+ >>> data_prep = _DataPreparation(data=df,
243
+ ... target_column="target",
244
+ ... verbose=1,
245
+ ... data_transform_dict=transform_dict,
246
+ ... task_type="Classification")
247
+ >>> feature_lists, transform_dict, data_mapping = data_prep.data_preparation(auto=True)
248
+ """
249
+ self._display_heading(phase=2,
250
+ progress_bar=self.progress_bar,
251
+ automl_phases=self.aml_phases)
252
+ self._display_msg(msg='Data preparation started ...',
253
+ progress_bar=self.progress_bar)
254
+ # Setting user value in case of custom running mode
255
+ if not auto:
256
+ self._set_custom_scaling_method()
257
+ self._set_custom_sampling()
258
+
259
+ # Handling ouliers in dataset
260
+ self._handle_outliers(auto)
261
+ self.progress_bar.update()
262
+
263
+ # Checking for data imbalance
264
+ data_imbalance_check = False
265
+ if not self.cluster and self.task_type.lower() == "classification":
266
+ # Checking for data imbalance in dataset
267
+ imb_ratio, minority_class = self._check_data_imbalance()
268
+ # Setting data imbalance check flag
269
+ data_imbalance_check = imb_ratio <= 0.4
270
+ # Handling data imbalance if imbalance ratio is less than 0.4 and sampling method is SMOTE/ADASYN
271
+ if data_imbalance_check and self._data_sampling_method.lower() in ["smote", "adasyn"]:
272
+ res = self._data_sampling_smote(minority_class=str(minority_class),
273
+ imb_ratio=imb_ratio)
274
+ data_imbalance_check = not res
275
+
276
+ # Temporary Pulling data for feature selection
277
+ # Will change after sto
278
+ # Handling float type features before processing with feature selection and scaling
279
+ training_data = self._handle_generated_features()
280
+ self.progress_bar.update()
281
+
282
+ # Checking for data imbalance
283
+ if data_imbalance_check:
284
+ training_data = self._data_sampling(training_data)
285
+ self.progress_bar.update()
286
+
287
+ # Sorting the data based on id to
288
+ # remove any shuffling done by sampling
289
+ training_data = training_data.sort_values(by=self.id_column)
290
+
291
+ if not self.cluster:
292
+ if self.enable_lasso:
293
+ # Performing feature selection using lasso followed by scaling
294
+ self._feature_selection_Lasso(training_data)
295
+ self._scaling_features(feature_selection_mtd="lasso")
296
+ self.progress_bar.update()
297
+
298
+ # Performing feature selection using rfe followed by scaling
299
+ self._feature_selection_RFE(training_data)
300
+ self._scaling_features(feature_selection_mtd="rfe")
301
+ self.progress_bar.update()
302
+ else:
303
+ self._scaling_features(feature_selection_mtd="Non_pca")
304
+ self.progress_bar.update()
305
+
306
+ # Performing scaling followed by feature selection using pca
307
+ self._scaling_features(feature_selection_mtd="pca")
308
+ self._feature_selection_PCA()
309
+ self.progress_bar.update()
310
+
311
+ if not self.cluster:
312
+ # Return feature lists based on whether lasso selection was performed
313
+ if self.enable_lasso:
314
+ return [self.lasso_feature, self.rfe_feature, self.pca_feature], self.data_transform_dict, self.data_mapping
315
+ else:
316
+ return [self.rfe_feature, self.pca_feature], self.data_transform_dict, self.data_mapping
317
+ else:
318
+ return [self.pca_feature, self.non_pca_feature], self.data_transform_dict, self.data_mapping
319
+
320
+
321
+ def _data_sampling_smote(self,
322
+ minority_class,
323
+ imb_ratio):
324
+ """
325
+ DESCRIPTION:
326
+ Internal function to handle data imbalance in dataset using SMOTE technique.
327
+
328
+ PARAMETERS:
329
+ minority_class:
330
+ Required Argument.
331
+ Specifies the minority class for which synthetic samples need to be
332
+ generated.
333
+ Note:
334
+ * The label for minority class under response column must be numeric integer.
335
+ Types: str
336
+
337
+ imb_ratio:
338
+ Required Argument.
339
+ Specifies the imbalance ratio in the dataset.
340
+ Types: float
341
+
342
+ RETURNS:
343
+ True if SMOTE sampling is successful, False otherwise.
344
+
345
+ RAISES:
346
+ None
347
+
348
+ EXAMPLES:
349
+ >>> res = self._data_sampling_smote(minority_class="1", imb_ratio=0.3)
350
+ """
351
+ self._display_msg(msg="Handling data imbalance using {} ...".format(self._data_sampling_method),
352
+ progress_bar=self.progress_bar,
353
+ show_data=True)
354
+
355
+ # Setting n_neighbors based on minority class count
356
+ minority_rows = self.data[self.data[self.target_column] == minority_class].shape[0]
357
+ n_neighbors = max(5, min(minority_rows - 1, 99))
358
+ # Setting oversampling factor based on imbalance ratio
359
+ os_factor = 5
360
+ if imb_ratio <= 0.4 and imb_ratio > 0.3:
361
+ os_factor = 1.5
362
+ elif imb_ratio <= 0.3 and imb_ratio > 0.2:
363
+ os_factor = 2
364
+ elif imb_ratio <= 0.2 and imb_ratio > 0.1:
365
+ os_factor = 3
366
+
367
+ # Setting parameters for SMOTE function
368
+ smote_params = {
369
+ "id_column": self.id_column,
370
+ "input_columns": [col for col in self.data.columns if col not in [self.id_column, self.target_column]],
371
+ "minority_class": minority_class,
372
+ "oversampling_factor": os_factor,
373
+ "sampling_strategy": self._data_sampling_method.lower(),
374
+ "n_neighbors": n_neighbors,
375
+ "persist": True, # setting persist True to avoid parser memory error
376
+ "seed": self.seed,
377
+ "display_table_name": False
378
+ }
379
+ try:
380
+ # Running SMOTE function to generate synthetic samples for minority class
381
+ sm_data = SMOTE(data=self.data,
382
+ response_column=self.target_column,
383
+ **smote_params).result
384
+ except TeradataMlException as e:
385
+ if self.raise_errors:
386
+ raise e
387
+ else:
388
+ # give user warning and proceed with default data sampling technique
389
+ msg = f"TD_SMOTE function failed, proceeding with default data sampling technique."
390
+ warnings.warn(message=msg, stacklevel=2)
391
+ return False
392
+ # concatenating original data with smote generated data
393
+ concat_df = self.data.concat(sm_data, ignore_index=False)
394
+ # Generating column list excluding id column
395
+ cols_lst = [col for col in concat_df.columns if col != self.id_column]
396
+ # Filling new and distinct ids to original table
397
+ # Note: persist is set to True to avoid parser memory error
398
+ obj = FillRowId(data=concat_df.select(cols_lst),
399
+ row_id_column=self.id_column,
400
+ persist=True,
401
+ display_table_name=False).result
402
+ self.data = obj
403
+
404
+ # Adding smote generated data table to garbage collector
405
+ GarbageCollector._add_to_garbagecollector(sm_data._table_name)
406
+ GarbageCollector._add_to_garbagecollector(obj._table_name)
407
+
408
+ self._display_msg(msg="Completed data imbalance handling.",
409
+ progress_bar=self.progress_bar,
410
+ show_data=True)
411
+ return True
412
+
413
+ def _handle_outliers(self,
414
+ auto):
415
+ """
416
+ DESCRIPTION:
417
+ Function to handle existing outliers in dataset based on running mode.
418
+
419
+ PARAMETERS:
420
+ auto:
421
+ Required Argument.
422
+ Specifies whether to run AutoML in custom mode or auto mode.
423
+ When set to False, runs in custom mode. Otherwise runs in auto mode.
424
+ Types: bool
425
+
426
+ RETURNS:
427
+ None
428
+
429
+ RAISES:
430
+ None
431
+
432
+ EXAMPLES:
433
+ >>> self._handle_outliers(auto=True)
434
+ """
435
+ if auto:
436
+ self._outlier_processing()
437
+ else:
438
+ self._custom_outlier_processing()
439
+
440
+ def _check_data_imbalance(self,
441
+ data):
442
+ """
443
+ DESCRIPTION:
444
+ Internal function calculate and checks the imbalance in dataset
445
+ in case of classification.
446
+
447
+ PARAMETERS:
448
+ data:
449
+ Required Argument.
450
+ Specifies the input teradataml DataFrame.
451
+ Types: teradataml DataFrame
452
+
453
+ RETURNS:
454
+ None
455
+
456
+ RAISES:
457
+ None
458
+
459
+ EXAMPLES:
460
+ >>> result = self._check_data_imbalance(data=training_data)
461
+ """
462
+ pass
463
+
464
+ def _data_sampling(self,
465
+ data):
466
+ """
467
+ DESCRIPTION:
468
+ Function to handle data imbalance in dataset using sampling techniques
469
+ in case of classification.
470
+
471
+ PARAMETERS:
472
+ data:
473
+ Required Argument.
474
+ Specifies the input teradataml DataFrame.
475
+ Types: pandas DataFrame
476
+
477
+ RETURNS:
478
+ None
479
+
480
+ RAISES:
481
+ None
482
+
483
+ EXAMPLES:
484
+ >>> sampled_data = self._data_sampling(data=imbalanced_data)
485
+ """
486
+ pass
487
+
488
+ def _set_custom_sampling(self):
489
+ """
490
+ DESCRIPTION:
491
+ Internal Function to handle customized data sampling for imbalance dataset.
492
+
493
+ PARAMETERS:
494
+ None
495
+
496
+ RETURNS:
497
+ None
498
+
499
+ RAISES:
500
+ None
501
+
502
+ EXAMPLES:
503
+ >>> self._set_custom_sampling()
504
+ """
505
+ pass
506
+
507
+ def _outlier_handling_techniques(self):
508
+ """
509
+ DESCRIPTION:
510
+ Function determines the handling techniques[drop rows/impute values] for outlier columns in the dataset.
511
+
512
+ PARAMETERS:
513
+ None
514
+
515
+ RETURNS:
516
+ tuple containing list of columns to drop rows and list of columns to impute.
517
+
518
+ RAISES:
519
+ None
520
+
521
+ EXAMPLES:
522
+ >>> drop_rows_cols, impute_cols = self._outlier_handling_techniques()
523
+ """
524
+ columns_to_drop_rows = []
525
+ columns_to_impute = []
526
+ # Keeping default method for outlier detection "Tukey"
527
+ outlier_method = "Tukey"
528
+
529
+ # List of columns for outlier processing.
530
+ # Excluding target column and excluded columns from outlier processing
531
+ outlier_columns = [col for col in self.data.columns if col not in self.excluded_columns +
532
+ [self.id_column, self.target_column]]
533
+
534
+ if len(outlier_columns) != 0:
535
+ # Detecting outlier percentage in each columns
536
+ outlier_percentage_df = self._outlier_detection(outlier_method, outlier_columns)
537
+
538
+ # Outlier Handling techniques
539
+ for i in outlier_percentage_df.itertuples():
540
+ # Column Name
541
+ col = i[0]
542
+ # Outlier value
543
+ value = i[1]
544
+ if self.cluster:
545
+ if value > 0.0:
546
+ columns_to_impute.append(col)
547
+ else:
548
+ # Dropping rows
549
+ if value > 0.0 and value <= 8.0 :
550
+ columns_to_drop_rows.append(col)
551
+ elif value> 8.0 and value <= 25.0:
552
+ columns_to_impute.append(col)
553
+
554
+ return columns_to_drop_rows, columns_to_impute
555
+
556
+ def _outlier_handling(self,
557
+ target_columns,
558
+ outlier_method,
559
+ replacement_value):
560
+ """
561
+ DESCRIPTION:
562
+ Function to handle outlier for target column based outlier method and replacement value.
563
+
564
+ PARAMETERS:
565
+ target_columns:
566
+ Required Argument.
567
+ Specifies the target columns required for outlier handling.
568
+ Types: str or list of strings (str)
569
+
570
+ outlier_method:
571
+ Required Argument.
572
+ Specifies the outlier method required for outlier handling.
573
+ Types: str
574
+
575
+ replacement_value:
576
+ Optional Argument.
577
+ Specifies the value required in case of outlier replacement.
578
+ Types: str, float
579
+
580
+ RETURNS:
581
+ OutlierFilterFit object.
582
+
583
+ RAISES:
584
+ None
585
+
586
+ EXAMPLES:
587
+ >>> outlier_result = self._outlier_handling(target_columns=["feature1"], outlier_method="Tukey", replacement_value="mean")
588
+ """
589
+
590
+ # Setting volatile and persist parameters for Outlier handling function
591
+ volatile, persist = self._get_generic_parameters(func_indicator='OutlierFilterIndicator',
592
+ param_name='OutlierFilterParam')
593
+
594
+ # Performing fit on dataset for outlier handling
595
+ fit_params = {
596
+ "data" : self.data,
597
+ "target_columns" : target_columns,
598
+ "outlier_method" : outlier_method,
599
+ "replacement_value" : replacement_value,
600
+ "volatile" : volatile,
601
+ "persist" : persist
602
+ }
603
+ outlier_fit_out = OutlierFilterFit(**fit_params)
604
+ # Performing transform on dataset for outlier handling
605
+ transform_params = {
606
+ "data" : self.data,
607
+ "object" : outlier_fit_out.result,
608
+ "persist" : True
609
+ }
610
+
611
+ # Disabling print if persist is True by default
612
+ if not volatile and not persist:
613
+ transform_params["display_table_name"] = False
614
+
615
+ if volatile:
616
+ transform_params["volatile"] = True
617
+ transform_params["persist"] = False
618
+ self.data = OutlierFilterTransform(**transform_params).result
619
+
620
+ if not volatile and not persist:
621
+ # Adding transformed data containing table to garbage collector
622
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
623
+
624
+ # Returning outlier fit object to store in data mapping dictionary
625
+ return outlier_fit_out
626
+
627
+ def _outlier_processing(self):
628
+ """
629
+ DESCRIPTION:
630
+ Function performs outlier processing on dataset. It identifies and handle outliers in the dataset.
631
+
632
+ PARAMETERS:
633
+ None
634
+
635
+ RETURNS:
636
+ None
637
+
638
+ RAISES:
639
+ None
640
+
641
+ EXAMPLES:
642
+ >>> self._outlier_processing()
643
+ """
644
+ self._display_msg(msg="Outlier preprocessing ...",
645
+ progress_bar=self.progress_bar,
646
+ show_data=True)
647
+ start_time = time.time()
648
+
649
+ # List of columns for dropping rows or imputing
650
+ columns_to_drop_rows, columns_to_impute = self._outlier_handling_techniques()
651
+ # Keeping default method for outlier handling "Tukey"
652
+ outlier_handling_method = "Tukey"
653
+
654
+ # Dropping rows
655
+ if len(columns_to_drop_rows) !=0:
656
+ self._display_msg(msg="Deleting rows of these columns:",
657
+ col_lst=columns_to_drop_rows,
658
+ progress_bar=self.progress_bar)
659
+ target_columns=columns_to_drop_rows
660
+ replacement_strategy = "DELETE"
661
+ fit_obj = self._outlier_handling(target_columns, outlier_handling_method, replacement_strategy)
662
+ self.data_mapping['fit_outlier_delete_output'] = fit_obj.output_data._table_name
663
+ self.data_mapping['fit_outlier_delete_result'] = self.data._table_name
664
+ self.data_mapping['outlier_filtered_data'] = self.data._table_name
665
+ self._display_msg(msg="Sample of dataset after removing outlier rows:",
666
+ data=self.data,
667
+ progress_bar=self.progress_bar)
668
+
669
+ # Imputing Median value in place of outliers
670
+ if len(columns_to_impute) != 0:
671
+ self._display_msg(msg="median inplace of outliers:",
672
+ col_lst=columns_to_impute,
673
+ progress_bar=self.progress_bar)
674
+ target_columns=columns_to_impute
675
+ replacement_strategy = "MEDIAN"
676
+ fit_obj = self._outlier_handling(target_columns, outlier_handling_method, replacement_strategy)
677
+ self.data_mapping['fit_outlier_impute_output'] = fit_obj.output_data._table_name
678
+ self.data_mapping['fit_outlier_impute_result'] = fit_obj.result._table_name
679
+ self.data_mapping['outlier_imputed_data'] = self.data._table_name
680
+ self._display_msg(msg="Sample of dataset after performing MEDIAN inplace:",
681
+ data=self.data,
682
+ progress_bar=self.progress_bar)
683
+
684
+ if len(columns_to_drop_rows) == 0 and len(columns_to_impute) == 0:
685
+ self._display_msg(msg='Analysis indicates not outlier in the dataset. No Action Taken.',
686
+ progress_bar=self.progress_bar)
687
+
688
+ end_time = time.time()
689
+ self._display_msg("Time Taken by Outlier processing: {:.2f} sec ".format(end_time - start_time),
690
+ progress_bar=self.progress_bar,
691
+ show_data=True)
692
+
693
+ def _custom_outlier_processing(self):
694
+ """
695
+ DESCRIPTION:
696
+ Function to perform outlier processing on dataset based on user input.
697
+
698
+ PARAMETERS:
699
+ None
700
+
701
+ RETURNS:
702
+ None
703
+
704
+ RAISES:
705
+ TeradataMlException, ValueError
706
+
707
+ EXAMPLES:
708
+ >>> self._custom_outlier_processing()
709
+ """
710
+ self._display_msg(msg="Starting customized outlier processing ...",
711
+ progress_bar=self.progress_bar,
712
+ show_data=True)
713
+ outlier_filter_input = self.custom_data.get("OutlierFilterIndicator", False)
714
+ # Checking user input for outlier filtering
715
+ if outlier_filter_input:
716
+ # List of columns for outlier processing.
717
+ target_columns = [col for col in self.data.columns if col not in self.excluded_columns]
718
+ # Checking user input for outlier detection method
719
+ outlier_method = self.custom_data.get("OutlierFilterMethod", None)
720
+ if outlier_method == 'PERCENTILE':
721
+ lower_percentile = self.custom_data.get("OutlierLowerPercentile", None)
722
+ upper_percentile = self.custom_data.get("OutlierUpperPercentile", None)
723
+ if lower_percentile and upper_percentile:
724
+ # Detecting outlier percentage for each columns
725
+ outlier_df = self._outlier_detection(outlier_method=outlier_method, column_list=target_columns, \
726
+ lower_percentile=lower_percentile, upper_percentile=upper_percentile)
727
+ else:
728
+ # Detecting outlier percentage for each column in case of other than percentile method
729
+ outlier_df = self._outlier_detection(outlier_method=outlier_method, column_list=target_columns)
730
+
731
+ # Checking for rows if outlier containing columns exist
732
+ if outlier_df.shape[0]:
733
+ # Checking user input list for outlier handling
734
+ outlier_transform_list = self.custom_data.get("OutlierFilterParam", None).copy()
735
+ if outlier_transform_list:
736
+ volatile = outlier_transform_list.pop("volatile", False)
737
+ persist = outlier_transform_list.pop("persist", False)
738
+ # Checking user input for outlier handling
739
+ _Validators._validate_dataframe_has_argument_columns(list(outlier_transform_list.keys()), "OutlierFilterParam",
740
+ self.data, "outlier_data")
741
+
742
+ for target_col, transform_val in outlier_transform_list.items():
743
+ # Fetching replacement value
744
+ replacement_value = transform_val["replacement_value"]
745
+ # Performing outlier handling
746
+ fit_obj = self._outlier_handling(target_col, outlier_method, replacement_value)
747
+ self.data_mapping[f'fit_{target_col}_outlier_output'] = fit_obj.output_data._table_name
748
+ self.data_mapping[f'fit_{target_col}_outlier_result'] = fit_obj.result._table_name
749
+ self.data_mapping[f'{target_col}_outlier_treated_data'] = self.data._table_name
750
+ self._display_msg(msg="Sample of dataset after performing custom outlier filtering",
751
+ data=self.data,progress_bar=self.progress_bar)
752
+ else:
753
+ self._display_msg(inline_msg="No information provided for feature transformation in outlier handling.",
754
+ progress_bar=self.progress_bar)
755
+ else:
756
+ self._display_msg(inline_msg="No oultiers found in dataset after applying the selected method.",
757
+ progress_bar=self.progress_bar)
758
+ else:
759
+ self._display_msg(inline_msg="No information provided for customized outlier processing. AutoML will proceed with default settings.",
760
+ progress_bar=self.progress_bar)
761
+ # Performing default handling for outliers
762
+ if not self.cluster:
763
+ self._outlier_processing()
764
+
765
+ # function for getting value of "K" in k folds cross validation
766
+ def _num_of_folds(self, rows=None):
767
+ """
768
+ DESCRIPTION:
769
+ Function to determine the number of folds for cross-validation
770
+ based on the number of rows in the dataset.
771
+ PARAMETERS:
772
+ rows:
773
+ Required Argument.
774
+ Specifies the number of rows in the dataset.
775
+ Types: int
776
+ RETURNS:
777
+ int, number of folds to be used for cross-validation.
778
+
779
+ RAISES:
780
+ None
781
+
782
+ EXAMPLES:
783
+ >>> folds = self._num_of_folds(rows=5000)
784
+ """
785
+ num_of_folds = lambda rows: 2 if rows > 20000 else (4 if 1000 < rows <= 20000 else 10)
786
+ return num_of_folds(rows)
787
+
788
+ def _feature_selection_PCA(self):
789
+ """
790
+ DESCRIPTION:
791
+ Function performs Principal Component Analysis (PCA) for feature selection.
792
+ It reduces the dimensionality of the dataset by identifying and retaining the most informative features.
793
+
794
+ PARAMETERS:
795
+ None
796
+
797
+ RETURNS:
798
+ None
799
+
800
+ RAISES:
801
+ None
802
+
803
+ EXAMPLES:
804
+ >>> self._feature_selection_PCA()
805
+ """
806
+ self._display_msg(msg="Dimension Reduction using pca ...",
807
+ progress_bar=self.progress_bar,
808
+ show_data=True)
809
+ # Required imports for PCA
810
+ from sklearn.decomposition import PCA
811
+
812
+ start_time = time.time()
813
+
814
+ # Temporary Pulling data for feature selection
815
+ pca_train = DataFrame.from_table(self.data_mapping['pca_train']).to_pandas()
816
+ # Drop unnecessary columns and store the result
817
+ if not self.cluster:
818
+ train_data = pca_train.drop(columns=[self.id_column, self.target_column], axis=1)
819
+ else:
820
+ train_data = pca_train.drop(columns=[self.id_column], axis=1)
821
+
822
+ # Initialize and fit PCA
823
+ pca = PCA(random_state=self.seed)
824
+ pca.fit(train_data)
825
+
826
+ # Find the number of components for PCA
827
+ variance = pca.explained_variance_ratio_
828
+ n = np.argmax(np.cumsum(variance) >= 0.95) + 1
829
+
830
+ # Create a new instance of PCA with the optimal number of components
831
+ pca = PCA(n_components=n, random_state=self.seed)
832
+
833
+ # Apply PCA on dataset
834
+ X_train_pca = pca.fit_transform(train_data)
835
+
836
+ # storing instance of PCA in data transformation dictionary
837
+ self.data_transform_dict["pca_fit_instance"] = pca
838
+ self.data_transform_dict["pca_fit_columns"] = train_data.columns.tolist()
839
+
840
+ #converting the numarray into dataframes
841
+ train_df = pd.DataFrame(X_train_pca)
842
+
843
+ #creating names for combined columns
844
+ column_name = {col: 'col_'+str(i) for i,col in enumerate(train_df.columns)}
845
+
846
+ # storing the new column names in data transformation dictionary
847
+ self.data_transform_dict['pca_new_column'] = column_name
848
+
849
+ #renaming them
850
+ train_df = train_df.rename(columns=column_name)
851
+
852
+ # adding the id column [PCA does not shuffle the dataset]
853
+ train_df = pd.concat([pca_train.reset_index(drop=True)[self.id_column],
854
+ train_df.reset_index(drop=True)], axis=1)
855
+
856
+ # merging target column with new data
857
+ if not self.cluster:
858
+ train_df[self.target_column] = pca_train[self.target_column].reset_index(drop=True)
859
+ self.pca_feature = train_df.drop(columns=[self.id_column, self.target_column],
860
+ axis=1).columns.tolist()
861
+ else:
862
+ self.pca_feature = train_df.drop(columns=[self.id_column],
863
+ axis=1).columns.tolist()
864
+
865
+ self._display_msg(msg="PCA columns:",
866
+ col_lst=self.pca_feature,
867
+ progress_bar=self.progress_bar)
868
+ end_time = time.time()
869
+ self._display_msg(msg="Total time taken by PCA: {:.2f} sec ".format( end_time - start_time),
870
+ progress_bar=self.progress_bar,
871
+ show_data=True)
872
+
873
+ # Pushing the data in database
874
+ self.copy_dataframe_to_sql(train_df, 'pca', self.persist)
875
+
876
+ def _feature_selection_RFE(self,
877
+ data=None):
878
+ """
879
+ DESCRIPTION:
880
+ Function performs Recursive Feature Elimination (RFE) for feature selection.
881
+ It identifies a subset of the most relevant features in the dataset.
882
+
883
+ PARAMETERS:
884
+ data:
885
+ Required Argument.
886
+ Specifies the input train pandas DataFrame.
887
+ Types: pandas Dataframe
888
+
889
+ RETURNS:
890
+ None
891
+
892
+ RAISES:
893
+ None
894
+
895
+ EXAMPLES:
896
+ >>> self._feature_selection_RFE(data=training_data)
897
+ """
898
+ self._display_msg(msg="Feature selection using rfe ...",
899
+ progress_bar=self.progress_bar,
900
+ show_data=True)
901
+
902
+ # Required imports for RFE
903
+ from sklearn.feature_selection import RFECV
904
+ from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier
905
+ from sklearn.model_selection import StratifiedKFold
906
+
907
+ start_time = time.time()
908
+ # Regression
909
+ is_classification = self.is_classification_type()
910
+ # Getting the value of k in k-fold cross-validation
911
+ folds = self._num_of_folds(data.shape[0])
912
+
913
+ # Suppressing warnings generated by pandas and sklearn
914
+ with warnings.catch_warnings():
915
+ warnings.filterwarnings('ignore')
916
+
917
+ # Random forest for RFE model
918
+ RFModel = RandomForestRegressor if not is_classification else RandomForestClassifier
919
+ rf = RFModel(n_estimators=100, random_state=self.seed)
920
+
921
+ # Determine the scoring metric based on the number of unique classes
922
+ score = 'r2' if not self.is_classification_type() \
923
+ else 'roc_auc' if self.data.drop_duplicate(self.target_column).size == 2 else 'f1_macro'
924
+
925
+ # # Instantiate StratifiedKFold with shuffling for classification
926
+ cv = folds if not self.is_classification_type() \
927
+ else StratifiedKFold(n_splits=folds, shuffle=False)
928
+
929
+ # Define the RFE with cross-validation
930
+ rfecv = RFECV(rf, cv=cv, scoring=score)
931
+
932
+ # Prepare data
933
+ train_data = data.drop(columns=[self.id_column, self.target_column], axis=1)
934
+ train_target = data[self.target_column]
935
+
936
+ # Fit the RFE using cv
937
+ rfecv.fit(train_data, train_target)
938
+
939
+ # Extract the features
940
+ features = train_data.columns[rfecv.support_].tolist()
941
+
942
+ self._display_msg(msg="feature selected by RFE:",
943
+ col_lst=features,
944
+ progress_bar=self.progress_bar)
945
+ features.append(self.target_column)
946
+ features.insert(0,self.id_column)
947
+
948
+ selected_rfe_df = data[features]
949
+
950
+ # storing the rfe selected features in data transformation dictionary
951
+ self.data_transform_dict['rfe_features'] = features
952
+
953
+ columns_to_rename = [col for col in selected_rfe_df.columns if col not in
954
+ [self.id_column, self.target_column]]
955
+ new_column = {col: f'r_{col}' for col in columns_to_rename}
956
+ self.excluded_columns.extend([new_column[key] for key in self.excluded_columns if key in new_column])
957
+
958
+ selected_rfe_df.rename(columns=new_column, inplace=True)
959
+
960
+ # storing the rename column list in data transformation dictionary
961
+ self.data_transform_dict['rfe_rename_column'] = columns_to_rename
962
+
963
+ end_time = time.time()
964
+ self._display_msg(msg="Total time taken by feature selection: {:.2f} sec ".format( end_time - start_time),
965
+ progress_bar=self.progress_bar,
966
+ show_data=True)
967
+ self.rfe_feature = selected_rfe_df.drop(columns=[self.id_column,self.target_column],
968
+ axis=1).columns.tolist()
969
+
970
+ # Pushing data into database
971
+ self.copy_dataframe_to_sql(selected_rfe_df, 'rfe', self.persist)
972
+
973
+ def _feature_selection_Lasso(self,
974
+ data=None):
975
+ """
976
+ DESCRIPTION:
977
+ Function performs Lasso Regression for feature selection.
978
+ It helps in identifing and retaining the most important features while setting less important ones to zero.
979
+
980
+ PARAMETERS:
981
+ data:
982
+ Required Argument.
983
+ Specifies the input train pandas DataFrame.
984
+ Types: pandas Dataframe
985
+
986
+ RETURNS:
987
+ None
988
+
989
+ RAISES:
990
+ None
991
+
992
+ EXAMPLES:
993
+ >>> self._feature_selection_Lasso(data=training_data)
994
+ """
995
+ start_time = time.time()
996
+ self._display_msg(msg="Feature selection using lasso ...",
997
+ progress_bar=self.progress_bar,
998
+ show_data=True)
999
+
1000
+ # Required imports for Lasso
1001
+ from sklearn.model_selection import GridSearchCV
1002
+ from sklearn.linear_model import Lasso
1003
+ from sklearn.linear_model import LogisticRegression
1004
+ from sklearn.model_selection import StratifiedKFold
1005
+
1006
+ # Getting the value k in k-fold cross-validation
1007
+ num_folds = self._num_of_folds(data.shape[0])
1008
+
1009
+ # Prepare data
1010
+ train_features = data.drop(columns=[self.id_column,self.target_column], axis=1)
1011
+ train_target = data[self.target_column]
1012
+
1013
+ # Suppressing warnings generated by pandas and sklearn
1014
+ with warnings.catch_warnings():
1015
+ warnings.filterwarnings('ignore')
1016
+
1017
+ # Determine the estimator and parameters based on the type of problem
1018
+ if self.is_classification_type():
1019
+ if self.data.drop_duplicate(self.target_column).size == 2:
1020
+ scoring_metric = 'roc_auc'
1021
+ else:
1022
+ scoring_metric = 'f1_macro'
1023
+ estimator = LogisticRegression(solver='saga', penalty='l2', multi_class='auto', random_state=self.seed)
1024
+ parameters = {'C':[0.00001,0.0001,0.001,0.01,0.05,0.1,10,100,1000], 'max_iter': [100, 500]}
1025
+ else:
1026
+ estimator = Lasso(random_state=self.seed)
1027
+ parameters = {'alpha':[0.00001,0.0001,0.001,0.01,0.05,0.1,10,100,1000], 'max_iter': [100, 500]}
1028
+ scoring_metric = "r2"
1029
+
1030
+ if self.is_classification_type():
1031
+ cv = StratifiedKFold(n_splits=5, shuffle=False)
1032
+ else:
1033
+ cv = num_folds
1034
+
1035
+ # Applying hyperparameter tuning and optimizing score
1036
+ hyperparameter_search = GridSearchCV(estimator, parameters, cv=cv, refit=True,
1037
+ scoring=scoring_metric, verbose=0)
1038
+
1039
+ # Fitting the best result from hyperparameter
1040
+ hyperparameter_search.fit(train_features, train_target)
1041
+
1042
+ # Extracting the important estimators
1043
+ feature_importance = np.abs(hyperparameter_search.best_estimator_.coef_)
1044
+
1045
+ # Extracting feature using estimators whose importance > 0
1046
+ if self.is_classification_type():
1047
+ selected_feature_indices = np.where(np.any(feature_importance > 0, axis=0))[0]
1048
+ selected_features = np.array(train_features.columns)[selected_feature_indices]
1049
+ important_features = list(set(selected_features))
1050
+ else:
1051
+ important_features = np.array(train_features.columns)[feature_importance>0].tolist()
1052
+
1053
+ self._display_msg(msg="feature selected by lasso:",
1054
+ col_lst=important_features,
1055
+ progress_bar=self.progress_bar)
1056
+
1057
+ important_features = [self.id_column] + important_features + [self.target_column]
1058
+ selected_lasso_df = data[important_features]
1059
+
1060
+ # Storing the lasso selected features in data transformation dictionary
1061
+ self.data_transform_dict['lasso_features'] = important_features
1062
+
1063
+ # Calculate the elapsed time
1064
+ end_time = time.time()
1065
+ self._display_msg(msg="Total time taken by feature selection: {:.2f} sec ".format( end_time - start_time),
1066
+ progress_bar=self.progress_bar,
1067
+ show_data=True)
1068
+ self.lasso_feature = selected_lasso_df.drop(columns=[self.id_column,self.target_column],
1069
+ axis=1).columns.tolist()
1070
+
1071
+ self.copy_dataframe_to_sql(selected_lasso_df, 'lasso', self.persist)
1072
+
1073
+ def copy_dataframe_to_sql(self,
1074
+ data,
1075
+ prefix,
1076
+ persist):
1077
+ """
1078
+ DESCRIPTION:
1079
+ Function to copy dataframe to SQL with generated table name.
1080
+
1081
+ PARAMETERS:
1082
+ data:
1083
+ Required Argument.
1084
+ Specifies the input pandas DataFrame.
1085
+ Types: pandas Dataframe
1086
+
1087
+ prefix:
1088
+ Required Argument.
1089
+ Specifies the prefix for the table name.
1090
+ Types: str
1091
+
1092
+ persist:
1093
+ Required Argument.
1094
+ Specifies whether to persist the results of the
1095
+ function in a table or not. When set to True,
1096
+ results are persisted in a table; otherwise,
1097
+ results are garbage collected at the end of the
1098
+ session.
1099
+ Types: bool
1100
+
1101
+ RETURNS:
1102
+ None
1103
+
1104
+ RAISES:
1105
+ None
1106
+
1107
+ EXAMPLES:
1108
+ >>> self.copy_dataframe_to_sql(data=selected_df, prefix="lasso", persist=True)
1109
+ """
1110
+ # Generating table names
1111
+ train_table_name = UtilFuncs._generate_temp_table_name(prefix='{}_train'.format(prefix),
1112
+ table_type = TeradataConstants.TERADATA_TABLE,
1113
+ gc_on_quit=not persist)
1114
+ # If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
1115
+ # table name in fully qualified format.
1116
+ train_table_name = UtilFuncs._extract_table_name(train_table_name)
1117
+
1118
+ # Storing the table names in the table name mapping dictionary
1119
+ self.data_mapping['{}_train'.format(prefix)] = train_table_name
1120
+
1121
+ # In the case of the VT option, the table was being persisted, so the VT condition is being checked.
1122
+ is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
1123
+ # Pushing data into database
1124
+ if self.is_classification_type():
1125
+ copy_to_sql(df=data, table_name=train_table_name, temporary=is_temporary, if_exists="replace", types={f'{self.target_column}': INTEGER})
1126
+ else:
1127
+ copy_to_sql(df=data, table_name=train_table_name, if_exists="replace", temporary=is_temporary)
1128
+
1129
+ def _scaling_features_helper(self,
1130
+ data=None,
1131
+ feature_selection_mtd=None):
1132
+ """
1133
+ DESCRIPTION:
1134
+ This function selects the features on which feature scaling should be applied.
1135
+
1136
+ PARAMETERS:
1137
+ data:
1138
+ Required Argument.
1139
+ Specifies the data on which feature scaling will be applied.
1140
+ Types: teradataml Dataframe
1141
+
1142
+ feature_selection_mtd:
1143
+ Required Argument.
1144
+ Specifies the feature selection algorithm used.
1145
+ Types: str
1146
+
1147
+ RETURNS:
1148
+ scl_col:
1149
+ list containing, the scaled columns.
1150
+
1151
+ RAISES:
1152
+ None
1153
+
1154
+ EXAMPLES:
1155
+ >>> scaled_cols = self._scaling_features_helper(data=training_data, feature_selection_mtd="lasso")
1156
+ """
1157
+ columns_to_scale = []
1158
+
1159
+ # Iterating over the columns
1160
+ for col in data.columns:
1161
+ # Selecting columns that will be scaled
1162
+ # Exculding target_col and columns with single value
1163
+ if col not in [self.id_column, self.target_column] and \
1164
+ data.drop_duplicate(col).size > 1:
1165
+ columns_to_scale.append(col)
1166
+
1167
+ if feature_selection_mtd == "lasso":
1168
+ self.lasso_feature = columns_to_scale
1169
+ elif feature_selection_mtd == "rfe":
1170
+ self.rfe_feature = columns_to_scale
1171
+ elif feature_selection_mtd == "pca":
1172
+ self.pca_feature = columns_to_scale
1173
+ elif feature_selection_mtd == "raw_scaled":
1174
+ self.raw_scaled_feature = columns_to_scale
1175
+ else:
1176
+ self.non_pca_feature = columns_to_scale
1177
+
1178
+ columns_to_scale = [col for col in columns_to_scale if col not in self.excluded_columns]
1179
+ return columns_to_scale
1180
+
1181
+ def _scaling_features(self,
1182
+ feature_selection_mtd=None):
1183
+ """
1184
+ DESCRIPTION:
1185
+ Function performs feature scaling on columns present inside the dataset
1186
+ using scaling methods [RANGE/ABS/STD/USTD/MEAN/MIDRANGE/RESCALE].
1187
+
1188
+ PARAMETERS:
1189
+ feature_selection_mtd:
1190
+ Required Argument.
1191
+ Specifies the feature selection algorithm used.
1192
+ Types: str
1193
+
1194
+ RETURNS:
1195
+ None
1196
+
1197
+ RAISES:
1198
+ None
1199
+
1200
+ EXAMPLES:
1201
+ >>> self._scaling_features(feature_selection_mtd="lasso")
1202
+ """
1203
+
1204
+ feature_selection_mtd = feature_selection_mtd.lower()
1205
+ self._display_msg(msg="Scaling Features of {} data ...".format(feature_selection_mtd),
1206
+ progress_bar=self.progress_bar,
1207
+ show_data=True)
1208
+
1209
+ start_time = time.time()
1210
+ data_to_scale = None
1211
+
1212
+ if not self.cluster:
1213
+ if self.is_classification_type():
1214
+ scale_method = self._scale_method_cls
1215
+ else:
1216
+ scale_method = self._scale_method_reg
1217
+ else:
1218
+ scale_method = self._scale_method_clust
1219
+
1220
+ # Loading data for feature scaling based of feature selection method
1221
+ if feature_selection_mtd == 'rfe':
1222
+ data_to_scale = DataFrame(self.data_mapping['rfe_train'])
1223
+ elif feature_selection_mtd == 'lasso':
1224
+ data_to_scale = DataFrame(self.data_mapping['lasso_train'])
1225
+ elif feature_selection_mtd == 'raw_scaled':
1226
+ data_to_scale = DataFrame(self.data_mapping['raw_scaled_train'])
1227
+ else:
1228
+ data_to_scale = self.data
1229
+
1230
+ # Setting volatile and persist parameters for ScaleFit and ScaleTransform functions
1231
+ volatile, persist = self._get_generic_parameters(func_indicator='FeatureScalingIndicator',
1232
+ param_name='FeatureScalingParam')
1233
+
1234
+ # List of columns that will be scaled
1235
+ scale_col= self._scaling_features_helper(data_to_scale, feature_selection_mtd)
1236
+
1237
+ if len(scale_col) != 0:
1238
+ self._display_msg(msg="columns that will be scaled: ",
1239
+ col_lst=scale_col,
1240
+ progress_bar=self.progress_bar)
1241
+
1242
+ # Scale Fit
1243
+ fit_obj = ScaleFit(data=data_to_scale,
1244
+ target_columns=scale_col,
1245
+ scale_method=scale_method,
1246
+ volatile=volatile,
1247
+ persist=persist)
1248
+
1249
+ self.data_mapping[f'fit_scale_{feature_selection_mtd}_output'] = fit_obj.output_data._table_name
1250
+ self.data_mapping[f'fit_scale_{feature_selection_mtd}_result'] = fit_obj.output._table_name
1251
+
1252
+ # storing the scale fit object and columns in data transformation dictionary
1253
+ self.data_transform_dict['{}_scale_fit_obj'.format(feature_selection_mtd)] = fit_obj.output
1254
+ self.data_transform_dict['{}_scale_col'.format(feature_selection_mtd)] = scale_col
1255
+
1256
+ # List of columns to copy to the output generated by scale transform
1257
+ accumulate_cols = list(set(data_to_scale.columns) - set(scale_col))
1258
+
1259
+ # Scaling dataset
1260
+ transform_obj = ScaleTransform(data=data_to_scale,
1261
+ object=fit_obj,
1262
+ accumulate=accumulate_cols)
1263
+ scaled_df = transform_obj.result
1264
+
1265
+ self._display_msg(msg="Dataset sample after scaling:",
1266
+ data=scaled_df,
1267
+ progress_bar=self.progress_bar)
1268
+ else:
1269
+ # No columns to scale, Original data will be used
1270
+ scaled_df = data_to_scale
1271
+ self._display_msg(msg="No columns to scale.",
1272
+ progress_bar=self.progress_bar)
1273
+
1274
+ self.copy_dataframe_to_sql(scaled_df, feature_selection_mtd, persist)
1275
+
1276
+ if self.cluster and feature_selection_mtd == "non_pca":
1277
+ self.data_mapping["non_pca_train"] = scaled_df._table_name
1278
+ elif self.cluster and feature_selection_mtd == "raw_scaled":
1279
+ self.data_mapping["raw_scaled_train"] = scaled_df._table_name
1280
+
1281
+ end_time = time.time()
1282
+ self._display_msg(msg="Total time taken by feature scaling: {:.2f} sec".format( end_time - start_time),
1283
+ progress_bar=self.progress_bar,
1284
+ show_data=True)
1285
+
1286
+ def _set_custom_scaling_method(self):
1287
+ """
1288
+ DESCRIPTION:
1289
+ Function to perform feature scaling based on user input.
1290
+
1291
+ PARAMETERS:
1292
+ None
1293
+
1294
+ RETURNS:
1295
+ None
1296
+
1297
+ RAISES:
1298
+ None
1299
+
1300
+ EXAMPLES:
1301
+ >>> self._set_custom_scaling_method()
1302
+ """
1303
+ # Fetching user input for performing customized scaling
1304
+ feature_scaling_input = self.custom_data.get("FeatureScalingIndicator", False)
1305
+ # Checking user input for feature scaling
1306
+ if feature_scaling_input:
1307
+ # Extracting scaling method
1308
+ custom_scaling_params = self.custom_data.get("FeatureScalingParam", None)
1309
+ if custom_scaling_params:
1310
+ custom_scaling_method = custom_scaling_params.get("FeatureScalingMethod", None)
1311
+ if custom_scaling_method is None:
1312
+ self._display_msg(inline_msg="No information provided for customized scaling method. AutoML will continue with default option.",
1313
+ progress_bar=self.progress_bar)
1314
+ else:
1315
+ if self.cluster:
1316
+ self._scale_method_cluster = custom_scaling_method
1317
+ elif self.is_classification_type():
1318
+ self._scale_method_cls = custom_scaling_method
1319
+ else:
1320
+ self._scale_method_reg = custom_scaling_method
1321
+ else:
1322
+ self._display_msg(inline_msg="No information provided for performing customized feature scaling. Proceeding with default option.",
1323
+ progress_bar=self.progress_bar)
1324
+
1325
+
1326
+ def _handle_generated_features(self):
1327
+ """
1328
+ DESCRIPTION:
1329
+ Function to handle newly generated float features. It will round them upto 4 digit after decimal point.
1330
+
1331
+ PARAMETERS:
1332
+ None
1333
+
1334
+ RETURNS:
1335
+ Pandas DataFrame containing, rounded up float columns.
1336
+
1337
+ RAISES:
1338
+ None
1339
+
1340
+ EXAMPLES:
1341
+ >>> processed_data = self._handle_generated_features()
1342
+ """
1343
+ # Assigning data to target dataframe
1344
+ target_df = self.data
1345
+ # Detecting list of float columns on target dataset
1346
+ float_columns =[col for col, d_type in target_df._column_names_and_types if d_type in ["float", "decimal.Decimal"]]
1347
+
1348
+ if len(float_columns) == 0:
1349
+ cols = target_df.columns
1350
+ # Doing reset index to get index column
1351
+ df = target_df.to_pandas().reset_index()
1352
+
1353
+ # Returning the dataframe with cols
1354
+ # to avoid extra columns generated by reset_index()
1355
+ return df[cols]
1356
+ # storing the column details for round up in data transformation dictionary
1357
+ self.data_transform_dict["round_columns"] = float_columns
1358
+ # Extracting accumulate columns
1359
+ accumulate_columns = self._extract_list(target_df.columns,float_columns)
1360
+ # Performing rounding up on target column upto 4 precision digit
1361
+ fit_params = {
1362
+ "data" : target_df,
1363
+ "target_columns" : float_columns,
1364
+ "precision_digit" : 4,
1365
+ "accumulate" : accumulate_columns,
1366
+ "persist" : True}
1367
+
1368
+ # Disabling print if persist is True by default
1369
+ if not self.volatile and not self.persist:
1370
+ fit_params["display_table_name"] = False
1371
+
1372
+ if self.volatile:
1373
+ fit_params["volatile"] = True
1374
+ fit_params["persist"] = False
1375
+
1376
+ transform_output = RoundColumns(**fit_params).result
1377
+ self.data_mapping['round_columns_data'] = transform_output._table_name
1378
+ if not self.volatile and not self.persist:
1379
+ # Adding transformed data containing table to garbage collector
1380
+ GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1381
+ cols = transform_output.columns
1382
+ df = transform_output.to_pandas().reset_index()
1383
+ df = df[cols]
1384
+ return df