teradataml 20.0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1208) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +2762 -0
  4. teradataml/__init__.py +78 -0
  5. teradataml/_version.py +11 -0
  6. teradataml/analytics/Transformations.py +2996 -0
  7. teradataml/analytics/__init__.py +82 -0
  8. teradataml/analytics/analytic_function_executor.py +2416 -0
  9. teradataml/analytics/analytic_query_generator.py +1050 -0
  10. teradataml/analytics/byom/H2OPredict.py +514 -0
  11. teradataml/analytics/byom/PMMLPredict.py +437 -0
  12. teradataml/analytics/byom/__init__.py +16 -0
  13. teradataml/analytics/json_parser/__init__.py +133 -0
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
  15. teradataml/analytics/json_parser/json_store.py +191 -0
  16. teradataml/analytics/json_parser/metadata.py +1666 -0
  17. teradataml/analytics/json_parser/utils.py +805 -0
  18. teradataml/analytics/meta_class.py +236 -0
  19. teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
  21. teradataml/analytics/sqle/__init__.py +128 -0
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
  24. teradataml/analytics/table_operator/__init__.py +11 -0
  25. teradataml/analytics/uaf/__init__.py +82 -0
  26. teradataml/analytics/utils.py +828 -0
  27. teradataml/analytics/valib.py +1617 -0
  28. teradataml/automl/__init__.py +5835 -0
  29. teradataml/automl/autodataprep/__init__.py +493 -0
  30. teradataml/automl/custom_json_utils.py +1625 -0
  31. teradataml/automl/data_preparation.py +1384 -0
  32. teradataml/automl/data_transformation.py +1254 -0
  33. teradataml/automl/feature_engineering.py +2273 -0
  34. teradataml/automl/feature_exploration.py +1873 -0
  35. teradataml/automl/model_evaluation.py +488 -0
  36. teradataml/automl/model_training.py +1407 -0
  37. teradataml/catalog/__init__.py +2 -0
  38. teradataml/catalog/byom.py +1759 -0
  39. teradataml/catalog/function_argument_mapper.py +859 -0
  40. teradataml/catalog/model_cataloging_utils.py +491 -0
  41. teradataml/clients/__init__.py +0 -0
  42. teradataml/clients/auth_client.py +137 -0
  43. teradataml/clients/keycloak_client.py +165 -0
  44. teradataml/clients/pkce_client.py +481 -0
  45. teradataml/common/__init__.py +1 -0
  46. teradataml/common/aed_utils.py +2078 -0
  47. teradataml/common/bulk_exposed_utils.py +113 -0
  48. teradataml/common/constants.py +1669 -0
  49. teradataml/common/deprecations.py +166 -0
  50. teradataml/common/exceptions.py +147 -0
  51. teradataml/common/formula.py +743 -0
  52. teradataml/common/garbagecollector.py +666 -0
  53. teradataml/common/logger.py +1261 -0
  54. teradataml/common/messagecodes.py +518 -0
  55. teradataml/common/messages.py +262 -0
  56. teradataml/common/pylogger.py +67 -0
  57. teradataml/common/sqlbundle.py +764 -0
  58. teradataml/common/td_coltype_code_to_tdtype.py +48 -0
  59. teradataml/common/utils.py +3166 -0
  60. teradataml/common/warnings.py +36 -0
  61. teradataml/common/wrapper_utils.py +625 -0
  62. teradataml/config/__init__.py +0 -0
  63. teradataml/config/dummy_file1.cfg +5 -0
  64. teradataml/config/dummy_file2.cfg +3 -0
  65. teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
  66. teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
  67. teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
  68. teradataml/context/__init__.py +0 -0
  69. teradataml/context/aed_context.py +223 -0
  70. teradataml/context/context.py +1462 -0
  71. teradataml/data/A_loan.csv +19 -0
  72. teradataml/data/BINARY_REALS_LEFT.csv +11 -0
  73. teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
  74. teradataml/data/B_loan.csv +49 -0
  75. teradataml/data/BuoyData2.csv +17 -0
  76. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
  77. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
  78. teradataml/data/Convolve2RealsLeft.csv +5 -0
  79. teradataml/data/Convolve2RealsRight.csv +5 -0
  80. teradataml/data/Convolve2ValidLeft.csv +11 -0
  81. teradataml/data/Convolve2ValidRight.csv +11 -0
  82. teradataml/data/DFFTConv_Real_8_8.csv +65 -0
  83. teradataml/data/Employee.csv +5 -0
  84. teradataml/data/Employee_Address.csv +4 -0
  85. teradataml/data/Employee_roles.csv +5 -0
  86. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  87. teradataml/data/Mall_customer_data.csv +201 -0
  88. teradataml/data/Orders1_12mf.csv +25 -0
  89. teradataml/data/Pi_loan.csv +7 -0
  90. teradataml/data/SMOOTHED_DATA.csv +7 -0
  91. teradataml/data/TestDFFT8.csv +9 -0
  92. teradataml/data/TestRiver.csv +109 -0
  93. teradataml/data/Traindata.csv +28 -0
  94. teradataml/data/__init__.py +0 -0
  95. teradataml/data/acf.csv +17 -0
  96. teradataml/data/adaboost_example.json +34 -0
  97. teradataml/data/adaboostpredict_example.json +24 -0
  98. teradataml/data/additional_table.csv +11 -0
  99. teradataml/data/admissions_test.csv +21 -0
  100. teradataml/data/admissions_train.csv +41 -0
  101. teradataml/data/admissions_train_nulls.csv +41 -0
  102. teradataml/data/advertising.csv +201 -0
  103. teradataml/data/ageandheight.csv +13 -0
  104. teradataml/data/ageandpressure.csv +31 -0
  105. teradataml/data/amazon_reviews_25.csv +26 -0
  106. teradataml/data/antiselect_example.json +36 -0
  107. teradataml/data/antiselect_input.csv +8 -0
  108. teradataml/data/antiselect_input_mixed_case.csv +8 -0
  109. teradataml/data/applicant_external.csv +7 -0
  110. teradataml/data/applicant_reference.csv +7 -0
  111. teradataml/data/apriori_example.json +22 -0
  112. teradataml/data/arima_example.json +9 -0
  113. teradataml/data/assortedtext_input.csv +8 -0
  114. teradataml/data/attribution_example.json +34 -0
  115. teradataml/data/attribution_sample_table.csv +27 -0
  116. teradataml/data/attribution_sample_table1.csv +6 -0
  117. teradataml/data/attribution_sample_table2.csv +11 -0
  118. teradataml/data/bank_churn.csv +10001 -0
  119. teradataml/data/bank_marketing.csv +11163 -0
  120. teradataml/data/bank_web_clicks1.csv +43 -0
  121. teradataml/data/bank_web_clicks2.csv +91 -0
  122. teradataml/data/bank_web_url.csv +85 -0
  123. teradataml/data/barrier.csv +2 -0
  124. teradataml/data/barrier_new.csv +3 -0
  125. teradataml/data/betweenness_example.json +14 -0
  126. teradataml/data/bike_sharing.csv +732 -0
  127. teradataml/data/bin_breaks.csv +8 -0
  128. teradataml/data/bin_fit_ip.csv +4 -0
  129. teradataml/data/binary_complex_left.csv +11 -0
  130. teradataml/data/binary_complex_right.csv +11 -0
  131. teradataml/data/binary_matrix_complex_left.csv +21 -0
  132. teradataml/data/binary_matrix_complex_right.csv +21 -0
  133. teradataml/data/binary_matrix_real_left.csv +21 -0
  134. teradataml/data/binary_matrix_real_right.csv +21 -0
  135. teradataml/data/blood2ageandweight.csv +26 -0
  136. teradataml/data/bmi.csv +501 -0
  137. teradataml/data/boston.csv +507 -0
  138. teradataml/data/boston2cols.csv +721 -0
  139. teradataml/data/breast_cancer.csv +570 -0
  140. teradataml/data/buoydata_mix.csv +11 -0
  141. teradataml/data/burst_data.csv +5 -0
  142. teradataml/data/burst_example.json +21 -0
  143. teradataml/data/byom_example.json +34 -0
  144. teradataml/data/bytes_table.csv +4 -0
  145. teradataml/data/cal_housing_ex_raw.csv +70 -0
  146. teradataml/data/callers.csv +7 -0
  147. teradataml/data/calls.csv +10 -0
  148. teradataml/data/cars_hist.csv +33 -0
  149. teradataml/data/cat_table.csv +25 -0
  150. teradataml/data/ccm_example.json +32 -0
  151. teradataml/data/ccm_input.csv +91 -0
  152. teradataml/data/ccm_input2.csv +13 -0
  153. teradataml/data/ccmexample.csv +101 -0
  154. teradataml/data/ccmprepare_example.json +9 -0
  155. teradataml/data/ccmprepare_input.csv +91 -0
  156. teradataml/data/cfilter_example.json +12 -0
  157. teradataml/data/changepointdetection_example.json +18 -0
  158. teradataml/data/changepointdetectionrt_example.json +8 -0
  159. teradataml/data/chi_sq.csv +3 -0
  160. teradataml/data/churn_data.csv +14 -0
  161. teradataml/data/churn_emission.csv +35 -0
  162. teradataml/data/churn_initial.csv +3 -0
  163. teradataml/data/churn_state_transition.csv +5 -0
  164. teradataml/data/citedges_2.csv +745 -0
  165. teradataml/data/citvertices_2.csv +1210 -0
  166. teradataml/data/clicks2.csv +16 -0
  167. teradataml/data/clickstream.csv +13 -0
  168. teradataml/data/clickstream1.csv +11 -0
  169. teradataml/data/closeness_example.json +16 -0
  170. teradataml/data/complaints.csv +21 -0
  171. teradataml/data/complaints_mini.csv +3 -0
  172. teradataml/data/complaints_test_tokenized.csv +353 -0
  173. teradataml/data/complaints_testtoken.csv +224 -0
  174. teradataml/data/complaints_tokens_model.csv +348 -0
  175. teradataml/data/complaints_tokens_test.csv +353 -0
  176. teradataml/data/complaints_traintoken.csv +472 -0
  177. teradataml/data/computers_category.csv +1001 -0
  178. teradataml/data/computers_test1.csv +1252 -0
  179. teradataml/data/computers_train1.csv +5009 -0
  180. teradataml/data/computers_train1_clustered.csv +5009 -0
  181. teradataml/data/confusionmatrix_example.json +9 -0
  182. teradataml/data/conversion_event_table.csv +3 -0
  183. teradataml/data/corr_input.csv +17 -0
  184. teradataml/data/correlation_example.json +11 -0
  185. teradataml/data/covid_confirm_sd.csv +83 -0
  186. teradataml/data/coxhazardratio_example.json +39 -0
  187. teradataml/data/coxph_example.json +15 -0
  188. teradataml/data/coxsurvival_example.json +28 -0
  189. teradataml/data/cpt.csv +41 -0
  190. teradataml/data/credit_ex_merged.csv +45 -0
  191. teradataml/data/creditcard_data.csv +1001 -0
  192. teradataml/data/customer_loyalty.csv +301 -0
  193. teradataml/data/customer_loyalty_newseq.csv +31 -0
  194. teradataml/data/customer_segmentation_test.csv +2628 -0
  195. teradataml/data/customer_segmentation_train.csv +8069 -0
  196. teradataml/data/dataframe_example.json +173 -0
  197. teradataml/data/decisionforest_example.json +37 -0
  198. teradataml/data/decisionforestpredict_example.json +38 -0
  199. teradataml/data/decisiontree_example.json +21 -0
  200. teradataml/data/decisiontreepredict_example.json +45 -0
  201. teradataml/data/dfft2_size4_real.csv +17 -0
  202. teradataml/data/dfft2_test_matrix16.csv +17 -0
  203. teradataml/data/dfft2conv_real_4_4.csv +65 -0
  204. teradataml/data/diabetes.csv +443 -0
  205. teradataml/data/diabetes_test.csv +89 -0
  206. teradataml/data/dict_table.csv +5 -0
  207. teradataml/data/docperterm_table.csv +4 -0
  208. teradataml/data/docs/__init__.py +1 -0
  209. teradataml/data/docs/byom/__init__.py +0 -0
  210. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
  211. teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
  212. teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
  213. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  214. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
  215. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  216. teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
  217. teradataml/data/docs/byom/docs/__init__.py +0 -0
  218. teradataml/data/docs/sqle/__init__.py +0 -0
  219. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
  220. teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
  221. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
  222. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
  223. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
  224. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
  225. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
  226. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
  227. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
  228. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
  229. teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
  230. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
  231. teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
  232. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
  233. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
  234. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
  235. teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
  236. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
  237. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
  238. teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
  239. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
  240. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
  241. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
  242. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
  243. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
  244. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
  245. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
  246. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
  247. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
  248. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
  249. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
  250. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
  251. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
  252. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
  253. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
  254. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
  255. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
  256. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
  257. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
  258. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
  259. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
  260. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
  261. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
  262. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
  263. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
  264. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
  265. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
  266. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
  267. teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
  268. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
  269. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
  270. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  271. teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
  272. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
  273. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
  274. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  275. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
  276. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
  277. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
  278. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
  279. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
  280. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
  281. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
  282. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
  283. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
  284. teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
  285. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
  286. teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
  287. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
  288. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
  289. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
  290. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
  291. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
  292. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
  293. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
  294. teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
  295. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
  296. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
  297. teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
  298. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
  299. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  300. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
  301. teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
  302. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  303. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
  304. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
  305. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
  306. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
  307. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
  308. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
  309. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
  310. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
  311. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
  312. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
  313. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
  314. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
  315. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
  316. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
  317. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
  318. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  319. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
  320. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
  321. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
  322. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
  323. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
  324. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
  325. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
  326. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
  327. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
  328. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
  329. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
  330. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  331. teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
  332. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
  333. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
  334. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
  335. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
  336. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
  337. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
  338. teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
  339. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
  340. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
  341. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
  342. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
  343. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
  344. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
  345. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
  346. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  347. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  348. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
  349. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
  350. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  351. teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
  352. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
  353. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
  354. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
  355. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
  356. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  357. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
  358. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
  359. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
  360. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
  361. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
  362. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
  363. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
  364. teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
  365. teradataml/data/docs/tableoperator/__init__.py +0 -0
  366. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
  367. teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
  368. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
  369. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
  370. teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
  371. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
  372. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
  373. teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
  374. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  375. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
  376. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
  377. teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
  378. teradataml/data/docs/uaf/__init__.py +0 -0
  379. teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
  380. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
  381. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
  382. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
  383. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  384. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  385. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
  386. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
  387. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
  388. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
  389. teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
  390. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
  391. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  392. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
  393. teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
  394. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
  395. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
  396. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
  397. teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
  398. teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
  399. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  400. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
  401. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
  402. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
  403. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
  404. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  405. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
  406. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
  407. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
  408. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
  409. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
  410. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
  411. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
  412. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  413. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  414. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  415. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
  416. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
  417. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
  418. teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
  419. teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
  420. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  421. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
  422. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
  423. teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
  424. teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
  425. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
  426. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
  427. teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
  428. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  429. teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
  430. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
  431. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
  432. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
  433. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
  434. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
  435. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
  436. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
  437. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
  438. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
  439. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
  440. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  441. teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
  442. teradataml/data/dtw_example.json +18 -0
  443. teradataml/data/dtw_t1.csv +11 -0
  444. teradataml/data/dtw_t2.csv +4 -0
  445. teradataml/data/dwt2d_dataTable.csv +65 -0
  446. teradataml/data/dwt2d_example.json +16 -0
  447. teradataml/data/dwt_dataTable.csv +8 -0
  448. teradataml/data/dwt_example.json +15 -0
  449. teradataml/data/dwt_filterTable.csv +3 -0
  450. teradataml/data/dwt_filter_dim.csv +5 -0
  451. teradataml/data/emission.csv +9 -0
  452. teradataml/data/emp_table_by_dept.csv +19 -0
  453. teradataml/data/employee_info.csv +4 -0
  454. teradataml/data/employee_table.csv +6 -0
  455. teradataml/data/excluding_event_table.csv +2 -0
  456. teradataml/data/finance_data.csv +6 -0
  457. teradataml/data/finance_data2.csv +61 -0
  458. teradataml/data/finance_data3.csv +93 -0
  459. teradataml/data/finance_data4.csv +13 -0
  460. teradataml/data/fish.csv +160 -0
  461. teradataml/data/fm_blood2ageandweight.csv +26 -0
  462. teradataml/data/fmeasure_example.json +12 -0
  463. teradataml/data/followers_leaders.csv +10 -0
  464. teradataml/data/fpgrowth_example.json +12 -0
  465. teradataml/data/frequentpaths_example.json +29 -0
  466. teradataml/data/friends.csv +9 -0
  467. teradataml/data/fs_input.csv +33 -0
  468. teradataml/data/fs_input1.csv +33 -0
  469. teradataml/data/genData.csv +513 -0
  470. teradataml/data/geodataframe_example.json +40 -0
  471. teradataml/data/glass_types.csv +215 -0
  472. teradataml/data/glm_admissions_model.csv +12 -0
  473. teradataml/data/glm_example.json +56 -0
  474. teradataml/data/glml1l2_example.json +28 -0
  475. teradataml/data/glml1l2predict_example.json +54 -0
  476. teradataml/data/glmpredict_example.json +54 -0
  477. teradataml/data/gq_t1.csv +21 -0
  478. teradataml/data/grocery_transaction.csv +19 -0
  479. teradataml/data/hconvolve_complex_right.csv +5 -0
  480. teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
  481. teradataml/data/histogram_example.json +12 -0
  482. teradataml/data/hmmdecoder_example.json +79 -0
  483. teradataml/data/hmmevaluator_example.json +25 -0
  484. teradataml/data/hmmsupervised_example.json +10 -0
  485. teradataml/data/hmmunsupervised_example.json +8 -0
  486. teradataml/data/hnsw_alter_data.csv +5 -0
  487. teradataml/data/hnsw_data.csv +10 -0
  488. teradataml/data/house_values.csv +12 -0
  489. teradataml/data/house_values2.csv +13 -0
  490. teradataml/data/housing_cat.csv +7 -0
  491. teradataml/data/housing_data.csv +9 -0
  492. teradataml/data/housing_test.csv +47 -0
  493. teradataml/data/housing_test_binary.csv +47 -0
  494. teradataml/data/housing_train.csv +493 -0
  495. teradataml/data/housing_train_attribute.csv +5 -0
  496. teradataml/data/housing_train_binary.csv +437 -0
  497. teradataml/data/housing_train_parameter.csv +2 -0
  498. teradataml/data/housing_train_response.csv +493 -0
  499. teradataml/data/housing_train_segment.csv +201 -0
  500. teradataml/data/ibm_stock.csv +370 -0
  501. teradataml/data/ibm_stock1.csv +370 -0
  502. teradataml/data/identitymatch_example.json +22 -0
  503. teradataml/data/idf_table.csv +4 -0
  504. teradataml/data/idwt2d_dataTable.csv +5 -0
  505. teradataml/data/idwt_dataTable.csv +8 -0
  506. teradataml/data/idwt_filterTable.csv +3 -0
  507. teradataml/data/impressions.csv +101 -0
  508. teradataml/data/inflation.csv +21 -0
  509. teradataml/data/initial.csv +3 -0
  510. teradataml/data/insect2Cols.csv +61 -0
  511. teradataml/data/insect_sprays.csv +13 -0
  512. teradataml/data/insurance.csv +1339 -0
  513. teradataml/data/interpolator_example.json +13 -0
  514. teradataml/data/interval_data.csv +5 -0
  515. teradataml/data/iris_altinput.csv +481 -0
  516. teradataml/data/iris_attribute_output.csv +8 -0
  517. teradataml/data/iris_attribute_test.csv +121 -0
  518. teradataml/data/iris_attribute_train.csv +481 -0
  519. teradataml/data/iris_category_expect_predict.csv +31 -0
  520. teradataml/data/iris_data.csv +151 -0
  521. teradataml/data/iris_input.csv +151 -0
  522. teradataml/data/iris_response_train.csv +121 -0
  523. teradataml/data/iris_test.csv +31 -0
  524. teradataml/data/iris_train.csv +121 -0
  525. teradataml/data/join_table1.csv +4 -0
  526. teradataml/data/join_table2.csv +4 -0
  527. teradataml/data/jsons/anly_function_name.json +7 -0
  528. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  529. teradataml/data/jsons/byom/dataikupredict.json +148 -0
  530. teradataml/data/jsons/byom/datarobotpredict.json +147 -0
  531. teradataml/data/jsons/byom/h2opredict.json +195 -0
  532. teradataml/data/jsons/byom/onnxembeddings.json +267 -0
  533. teradataml/data/jsons/byom/onnxpredict.json +187 -0
  534. teradataml/data/jsons/byom/pmmlpredict.json +147 -0
  535. teradataml/data/jsons/paired_functions.json +450 -0
  536. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
  537. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
  538. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
  539. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
  540. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
  541. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
  542. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
  543. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
  544. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
  545. teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
  546. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
  547. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
  548. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
  549. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
  550. teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
  551. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
  552. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
  553. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
  554. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
  555. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
  556. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
  557. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
  558. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
  559. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
  560. teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
  561. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
  562. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
  563. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
  564. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
  565. teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
  566. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
  567. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
  568. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
  569. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
  570. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
  571. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
  572. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
  573. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
  574. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
  575. teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
  576. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
  577. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
  578. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
  579. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
  580. teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
  581. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
  582. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
  583. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
  584. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
  585. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
  586. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
  587. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
  588. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
  589. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
  590. teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
  591. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
  592. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
  593. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
  594. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
  595. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
  596. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
  597. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
  598. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
  599. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
  600. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
  601. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
  602. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
  603. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
  604. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
  605. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
  606. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
  607. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
  608. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
  609. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
  610. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
  611. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
  612. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
  613. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
  614. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
  615. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
  616. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
  617. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
  618. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
  619. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
  620. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
  621. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
  622. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
  623. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
  624. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
  625. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
  626. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
  627. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
  628. teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
  629. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
  630. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
  631. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
  632. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
  633. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
  634. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
  635. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
  636. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
  637. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
  638. teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
  639. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
  640. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
  641. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
  642. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
  643. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  644. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
  645. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
  646. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  647. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
  648. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
  649. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
  650. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
  651. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
  652. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
  653. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
  654. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
  655. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
  656. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
  657. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
  658. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
  659. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
  660. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
  661. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
  662. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
  663. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
  664. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
  665. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
  666. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
  667. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
  668. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
  669. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
  670. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  671. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  672. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  673. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
  674. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
  675. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
  676. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
  677. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
  678. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
  679. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
  680. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
  681. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
  682. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
  683. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
  684. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
  685. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  686. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
  687. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
  688. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
  689. teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
  690. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
  691. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
  692. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
  693. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
  694. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
  695. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
  696. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
  697. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  698. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
  699. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
  700. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
  701. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
  702. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
  703. teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
  704. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
  705. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
  706. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
  707. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
  708. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  709. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
  710. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
  711. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  712. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
  713. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
  714. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
  715. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  716. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
  717. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
  718. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
  719. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
  720. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
  721. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
  722. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
  723. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
  724. teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
  725. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
  726. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
  727. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
  728. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
  729. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
  730. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
  731. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
  732. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
  733. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
  734. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
  735. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
  736. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  737. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  738. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  739. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  740. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  741. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  742. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  743. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  744. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  745. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  746. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  747. teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
  748. teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
  749. teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
  750. teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
  751. teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
  752. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  753. teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
  754. teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
  755. teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
  756. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
  757. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
  758. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
  759. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  760. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  761. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
  762. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
  763. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
  764. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
  765. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
  766. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
  767. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
  768. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
  769. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
  770. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
  771. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
  772. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
  773. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
  774. teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
  775. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
  776. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  777. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  778. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
  779. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
  780. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
  781. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
  782. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
  783. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
  784. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
  785. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
  786. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  787. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  788. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
  789. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  790. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
  791. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
  792. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
  793. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  794. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
  795. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
  796. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
  797. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
  798. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
  799. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
  800. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
  801. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
  802. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  803. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
  804. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
  805. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
  806. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
  807. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
  808. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
  809. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
  810. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
  811. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
  812. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
  813. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
  814. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  815. teradataml/data/kmeans_example.json +23 -0
  816. teradataml/data/kmeans_table.csv +10 -0
  817. teradataml/data/kmeans_us_arrests_data.csv +51 -0
  818. teradataml/data/knn_example.json +19 -0
  819. teradataml/data/knnrecommender_example.json +7 -0
  820. teradataml/data/knnrecommenderpredict_example.json +12 -0
  821. teradataml/data/lar_example.json +17 -0
  822. teradataml/data/larpredict_example.json +30 -0
  823. teradataml/data/lc_new_predictors.csv +5 -0
  824. teradataml/data/lc_new_reference.csv +9 -0
  825. teradataml/data/lda_example.json +9 -0
  826. teradataml/data/ldainference_example.json +15 -0
  827. teradataml/data/ldatopicsummary_example.json +9 -0
  828. teradataml/data/levendist_input.csv +13 -0
  829. teradataml/data/levenshteindistance_example.json +10 -0
  830. teradataml/data/linreg_example.json +10 -0
  831. teradataml/data/load_example_data.py +350 -0
  832. teradataml/data/loan_prediction.csv +295 -0
  833. teradataml/data/lungcancer.csv +138 -0
  834. teradataml/data/mappingdata.csv +12 -0
  835. teradataml/data/medical_readings.csv +101 -0
  836. teradataml/data/milk_timeseries.csv +157 -0
  837. teradataml/data/min_max_titanic.csv +4 -0
  838. teradataml/data/minhash_example.json +6 -0
  839. teradataml/data/ml_ratings.csv +7547 -0
  840. teradataml/data/ml_ratings_10.csv +2445 -0
  841. teradataml/data/mobile_data.csv +13 -0
  842. teradataml/data/model1_table.csv +5 -0
  843. teradataml/data/model2_table.csv +5 -0
  844. teradataml/data/models/License_file.txt +1 -0
  845. teradataml/data/models/License_file_empty.txt +0 -0
  846. teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
  847. teradataml/data/models/dr_iris_rf +0 -0
  848. teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
  849. teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
  850. teradataml/data/models/iris_db_glm_model.pmml +57 -0
  851. teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
  852. teradataml/data/models/iris_kmeans_model +0 -0
  853. teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
  854. teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
  855. teradataml/data/modularity_example.json +12 -0
  856. teradataml/data/movavg_example.json +8 -0
  857. teradataml/data/mtx1.csv +7 -0
  858. teradataml/data/mtx2.csv +13 -0
  859. teradataml/data/multi_model_classification.csv +401 -0
  860. teradataml/data/multi_model_regression.csv +401 -0
  861. teradataml/data/mvdfft8.csv +9 -0
  862. teradataml/data/naivebayes_example.json +10 -0
  863. teradataml/data/naivebayespredict_example.json +19 -0
  864. teradataml/data/naivebayestextclassifier2_example.json +7 -0
  865. teradataml/data/naivebayestextclassifier_example.json +8 -0
  866. teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
  867. teradataml/data/name_Find_configure.csv +10 -0
  868. teradataml/data/namedentityfinder_example.json +14 -0
  869. teradataml/data/namedentityfinderevaluator_example.json +10 -0
  870. teradataml/data/namedentityfindertrainer_example.json +6 -0
  871. teradataml/data/nb_iris_input_test.csv +31 -0
  872. teradataml/data/nb_iris_input_train.csv +121 -0
  873. teradataml/data/nbp_iris_model.csv +13 -0
  874. teradataml/data/ner_dict.csv +8 -0
  875. teradataml/data/ner_extractor_text.csv +2 -0
  876. teradataml/data/ner_input_eng.csv +7 -0
  877. teradataml/data/ner_rule.csv +5 -0
  878. teradataml/data/ner_sports_test2.csv +29 -0
  879. teradataml/data/ner_sports_train.csv +501 -0
  880. teradataml/data/nerevaluator_example.json +6 -0
  881. teradataml/data/nerextractor_example.json +18 -0
  882. teradataml/data/nermem_sports_test.csv +18 -0
  883. teradataml/data/nermem_sports_train.csv +51 -0
  884. teradataml/data/nertrainer_example.json +7 -0
  885. teradataml/data/ngrams_example.json +7 -0
  886. teradataml/data/notebooks/__init__.py +0 -0
  887. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
  888. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
  889. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
  890. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
  891. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
  892. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
  893. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
  894. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
  895. teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
  896. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
  897. teradataml/data/npath_example.json +23 -0
  898. teradataml/data/ntree_example.json +14 -0
  899. teradataml/data/numeric_strings.csv +5 -0
  900. teradataml/data/numerics.csv +4 -0
  901. teradataml/data/ocean_buoy.csv +17 -0
  902. teradataml/data/ocean_buoy2.csv +17 -0
  903. teradataml/data/ocean_buoys.csv +28 -0
  904. teradataml/data/ocean_buoys2.csv +10 -0
  905. teradataml/data/ocean_buoys_nonpti.csv +28 -0
  906. teradataml/data/ocean_buoys_seq.csv +29 -0
  907. teradataml/data/onehot_encoder_train.csv +4 -0
  908. teradataml/data/openml_example.json +92 -0
  909. teradataml/data/optional_event_table.csv +4 -0
  910. teradataml/data/orders1.csv +11 -0
  911. teradataml/data/orders1_12.csv +13 -0
  912. teradataml/data/orders_ex.csv +4 -0
  913. teradataml/data/pack_example.json +9 -0
  914. teradataml/data/package_tracking.csv +19 -0
  915. teradataml/data/package_tracking_pti.csv +19 -0
  916. teradataml/data/pagerank_example.json +13 -0
  917. teradataml/data/paragraphs_input.csv +6 -0
  918. teradataml/data/pathanalyzer_example.json +8 -0
  919. teradataml/data/pathgenerator_example.json +8 -0
  920. teradataml/data/patient_profile.csv +101 -0
  921. teradataml/data/pattern_matching_data.csv +11 -0
  922. teradataml/data/payment_fraud_dataset.csv +10001 -0
  923. teradataml/data/peppers.png +0 -0
  924. teradataml/data/phrases.csv +7 -0
  925. teradataml/data/pivot_example.json +9 -0
  926. teradataml/data/pivot_input.csv +22 -0
  927. teradataml/data/playerRating.csv +31 -0
  928. teradataml/data/pos_input.csv +40 -0
  929. teradataml/data/postagger_example.json +7 -0
  930. teradataml/data/posttagger_output.csv +44 -0
  931. teradataml/data/production_data.csv +17 -0
  932. teradataml/data/production_data2.csv +7 -0
  933. teradataml/data/randomsample_example.json +32 -0
  934. teradataml/data/randomwalksample_example.json +9 -0
  935. teradataml/data/rank_table.csv +6 -0
  936. teradataml/data/real_values.csv +14 -0
  937. teradataml/data/ref_mobile_data.csv +4 -0
  938. teradataml/data/ref_mobile_data_dense.csv +2 -0
  939. teradataml/data/ref_url.csv +17 -0
  940. teradataml/data/restaurant_reviews.csv +7 -0
  941. teradataml/data/retail_churn_table.csv +27772 -0
  942. teradataml/data/river_data.csv +145 -0
  943. teradataml/data/roc_example.json +8 -0
  944. teradataml/data/roc_input.csv +101 -0
  945. teradataml/data/rule_inputs.csv +6 -0
  946. teradataml/data/rule_table.csv +2 -0
  947. teradataml/data/sales.csv +7 -0
  948. teradataml/data/sales_transaction.csv +501 -0
  949. teradataml/data/salesdata.csv +342 -0
  950. teradataml/data/sample_cities.csv +3 -0
  951. teradataml/data/sample_shapes.csv +11 -0
  952. teradataml/data/sample_streets.csv +3 -0
  953. teradataml/data/sampling_example.json +16 -0
  954. teradataml/data/sax_example.json +17 -0
  955. teradataml/data/scale_attributes.csv +3 -0
  956. teradataml/data/scale_example.json +74 -0
  957. teradataml/data/scale_housing.csv +11 -0
  958. teradataml/data/scale_housing_test.csv +6 -0
  959. teradataml/data/scale_input_part_sparse.csv +31 -0
  960. teradataml/data/scale_input_partitioned.csv +16 -0
  961. teradataml/data/scale_input_sparse.csv +11 -0
  962. teradataml/data/scale_parameters.csv +3 -0
  963. teradataml/data/scale_stat.csv +11 -0
  964. teradataml/data/scalebypartition_example.json +13 -0
  965. teradataml/data/scalemap_example.json +13 -0
  966. teradataml/data/scalesummary_example.json +12 -0
  967. teradataml/data/score_category.csv +101 -0
  968. teradataml/data/score_summary.csv +4 -0
  969. teradataml/data/script_example.json +10 -0
  970. teradataml/data/scripts/deploy_script.py +84 -0
  971. teradataml/data/scripts/lightgbm/dataset.template +175 -0
  972. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
  973. teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
  974. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
  975. teradataml/data/scripts/mapper.R +20 -0
  976. teradataml/data/scripts/mapper.py +16 -0
  977. teradataml/data/scripts/mapper_replace.py +16 -0
  978. teradataml/data/scripts/sklearn/__init__.py +0 -0
  979. teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
  980. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
  981. teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
  982. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
  983. teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
  984. teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
  985. teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
  986. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  987. teradataml/data/seeds.csv +10 -0
  988. teradataml/data/sentenceextractor_example.json +7 -0
  989. teradataml/data/sentiment_extract_input.csv +11 -0
  990. teradataml/data/sentiment_train.csv +16 -0
  991. teradataml/data/sentiment_word.csv +20 -0
  992. teradataml/data/sentiment_word_input.csv +20 -0
  993. teradataml/data/sentimentextractor_example.json +24 -0
  994. teradataml/data/sentimenttrainer_example.json +8 -0
  995. teradataml/data/sequence_table.csv +10 -0
  996. teradataml/data/seriessplitter_example.json +8 -0
  997. teradataml/data/sessionize_example.json +17 -0
  998. teradataml/data/sessionize_table.csv +116 -0
  999. teradataml/data/setop_test1.csv +24 -0
  1000. teradataml/data/setop_test2.csv +22 -0
  1001. teradataml/data/soc_nw_edges.csv +11 -0
  1002. teradataml/data/soc_nw_vertices.csv +8 -0
  1003. teradataml/data/souvenir_timeseries.csv +168 -0
  1004. teradataml/data/sparse_iris_attribute.csv +5 -0
  1005. teradataml/data/sparse_iris_test.csv +121 -0
  1006. teradataml/data/sparse_iris_train.csv +601 -0
  1007. teradataml/data/star1.csv +6 -0
  1008. teradataml/data/star_pivot.csv +8 -0
  1009. teradataml/data/state_transition.csv +5 -0
  1010. teradataml/data/stock_data.csv +53 -0
  1011. teradataml/data/stock_movement.csv +11 -0
  1012. teradataml/data/stock_vol.csv +76 -0
  1013. teradataml/data/stop_words.csv +8 -0
  1014. teradataml/data/store_sales.csv +37 -0
  1015. teradataml/data/stringsimilarity_example.json +8 -0
  1016. teradataml/data/strsimilarity_input.csv +13 -0
  1017. teradataml/data/students.csv +101 -0
  1018. teradataml/data/svm_iris_input_test.csv +121 -0
  1019. teradataml/data/svm_iris_input_train.csv +481 -0
  1020. teradataml/data/svm_iris_model.csv +7 -0
  1021. teradataml/data/svmdense_example.json +10 -0
  1022. teradataml/data/svmdensepredict_example.json +19 -0
  1023. teradataml/data/svmsparse_example.json +8 -0
  1024. teradataml/data/svmsparsepredict_example.json +14 -0
  1025. teradataml/data/svmsparsesummary_example.json +8 -0
  1026. teradataml/data/target_mobile_data.csv +13 -0
  1027. teradataml/data/target_mobile_data_dense.csv +5 -0
  1028. teradataml/data/target_udt_data.csv +8 -0
  1029. teradataml/data/tdnerextractor_example.json +14 -0
  1030. teradataml/data/templatedata.csv +1201 -0
  1031. teradataml/data/templates/open_source_ml.json +11 -0
  1032. teradataml/data/teradata_icon.ico +0 -0
  1033. teradataml/data/teradataml_example.json +1473 -0
  1034. teradataml/data/test_classification.csv +101 -0
  1035. teradataml/data/test_loan_prediction.csv +53 -0
  1036. teradataml/data/test_pacf_12.csv +37 -0
  1037. teradataml/data/test_prediction.csv +101 -0
  1038. teradataml/data/test_regression.csv +101 -0
  1039. teradataml/data/test_river2.csv +109 -0
  1040. teradataml/data/text_inputs.csv +6 -0
  1041. teradataml/data/textchunker_example.json +8 -0
  1042. teradataml/data/textclassifier_example.json +7 -0
  1043. teradataml/data/textclassifier_input.csv +7 -0
  1044. teradataml/data/textclassifiertrainer_example.json +7 -0
  1045. teradataml/data/textmorph_example.json +11 -0
  1046. teradataml/data/textparser_example.json +15 -0
  1047. teradataml/data/texttagger_example.json +12 -0
  1048. teradataml/data/texttokenizer_example.json +7 -0
  1049. teradataml/data/texttrainer_input.csv +11 -0
  1050. teradataml/data/tf_example.json +7 -0
  1051. teradataml/data/tfidf_example.json +14 -0
  1052. teradataml/data/tfidf_input1.csv +201 -0
  1053. teradataml/data/tfidf_train.csv +6 -0
  1054. teradataml/data/time_table1.csv +535 -0
  1055. teradataml/data/time_table2.csv +14 -0
  1056. teradataml/data/timeseriesdata.csv +1601 -0
  1057. teradataml/data/timeseriesdatasetsd4.csv +105 -0
  1058. teradataml/data/timestamp_data.csv +4 -0
  1059. teradataml/data/titanic.csv +892 -0
  1060. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  1061. teradataml/data/to_num_data.csv +4 -0
  1062. teradataml/data/tochar_data.csv +5 -0
  1063. teradataml/data/token_table.csv +696 -0
  1064. teradataml/data/train_multiclass.csv +101 -0
  1065. teradataml/data/train_regression.csv +101 -0
  1066. teradataml/data/train_regression_multiple_labels.csv +101 -0
  1067. teradataml/data/train_tracking.csv +28 -0
  1068. teradataml/data/trans_dense.csv +16 -0
  1069. teradataml/data/trans_sparse.csv +55 -0
  1070. teradataml/data/transformation_table.csv +6 -0
  1071. teradataml/data/transformation_table_new.csv +2 -0
  1072. teradataml/data/tv_spots.csv +16 -0
  1073. teradataml/data/twod_climate_data.csv +117 -0
  1074. teradataml/data/uaf_example.json +529 -0
  1075. teradataml/data/univariatestatistics_example.json +9 -0
  1076. teradataml/data/unpack_example.json +10 -0
  1077. teradataml/data/unpivot_example.json +25 -0
  1078. teradataml/data/unpivot_input.csv +8 -0
  1079. teradataml/data/url_data.csv +10 -0
  1080. teradataml/data/us_air_pass.csv +37 -0
  1081. teradataml/data/us_population.csv +624 -0
  1082. teradataml/data/us_states_shapes.csv +52 -0
  1083. teradataml/data/varmax_example.json +18 -0
  1084. teradataml/data/vectordistance_example.json +30 -0
  1085. teradataml/data/ville_climatedata.csv +121 -0
  1086. teradataml/data/ville_tempdata.csv +12 -0
  1087. teradataml/data/ville_tempdata1.csv +12 -0
  1088. teradataml/data/ville_temperature.csv +11 -0
  1089. teradataml/data/waveletTable.csv +1605 -0
  1090. teradataml/data/waveletTable2.csv +1605 -0
  1091. teradataml/data/weightedmovavg_example.json +9 -0
  1092. teradataml/data/wft_testing.csv +5 -0
  1093. teradataml/data/windowdfft.csv +16 -0
  1094. teradataml/data/wine_data.csv +1600 -0
  1095. teradataml/data/word_embed_input_table1.csv +6 -0
  1096. teradataml/data/word_embed_input_table2.csv +5 -0
  1097. teradataml/data/word_embed_model.csv +23 -0
  1098. teradataml/data/words_input.csv +13 -0
  1099. teradataml/data/xconvolve_complex_left.csv +6 -0
  1100. teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
  1101. teradataml/data/xgboost_example.json +36 -0
  1102. teradataml/data/xgboostpredict_example.json +32 -0
  1103. teradataml/data/ztest_example.json +16 -0
  1104. teradataml/dataframe/__init__.py +0 -0
  1105. teradataml/dataframe/copy_to.py +2446 -0
  1106. teradataml/dataframe/data_transfer.py +2840 -0
  1107. teradataml/dataframe/dataframe.py +20908 -0
  1108. teradataml/dataframe/dataframe_utils.py +2114 -0
  1109. teradataml/dataframe/fastload.py +794 -0
  1110. teradataml/dataframe/functions.py +2110 -0
  1111. teradataml/dataframe/indexer.py +424 -0
  1112. teradataml/dataframe/row.py +160 -0
  1113. teradataml/dataframe/setop.py +1171 -0
  1114. teradataml/dataframe/sql.py +10904 -0
  1115. teradataml/dataframe/sql_function_parameters.py +440 -0
  1116. teradataml/dataframe/sql_functions.py +652 -0
  1117. teradataml/dataframe/sql_interfaces.py +220 -0
  1118. teradataml/dataframe/vantage_function_types.py +675 -0
  1119. teradataml/dataframe/window.py +694 -0
  1120. teradataml/dbutils/__init__.py +3 -0
  1121. teradataml/dbutils/dbutils.py +2871 -0
  1122. teradataml/dbutils/filemgr.py +318 -0
  1123. teradataml/gen_ai/__init__.py +2 -0
  1124. teradataml/gen_ai/convAI.py +473 -0
  1125. teradataml/geospatial/__init__.py +4 -0
  1126. teradataml/geospatial/geodataframe.py +1105 -0
  1127. teradataml/geospatial/geodataframecolumn.py +392 -0
  1128. teradataml/geospatial/geometry_types.py +926 -0
  1129. teradataml/hyperparameter_tuner/__init__.py +1 -0
  1130. teradataml/hyperparameter_tuner/optimizer.py +4115 -0
  1131. teradataml/hyperparameter_tuner/utils.py +303 -0
  1132. teradataml/lib/__init__.py +0 -0
  1133. teradataml/lib/aed_0_1.dll +0 -0
  1134. teradataml/lib/libaed_0_1.dylib +0 -0
  1135. teradataml/lib/libaed_0_1.so +0 -0
  1136. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  1137. teradataml/lib/libaed_0_1_ppc64le.so +0 -0
  1138. teradataml/opensource/__init__.py +1 -0
  1139. teradataml/opensource/_base.py +1321 -0
  1140. teradataml/opensource/_class.py +464 -0
  1141. teradataml/opensource/_constants.py +61 -0
  1142. teradataml/opensource/_lightgbm.py +949 -0
  1143. teradataml/opensource/_sklearn.py +1008 -0
  1144. teradataml/opensource/_wrapper_utils.py +267 -0
  1145. teradataml/options/__init__.py +148 -0
  1146. teradataml/options/configure.py +489 -0
  1147. teradataml/options/display.py +187 -0
  1148. teradataml/plot/__init__.py +3 -0
  1149. teradataml/plot/axis.py +1427 -0
  1150. teradataml/plot/constants.py +15 -0
  1151. teradataml/plot/figure.py +431 -0
  1152. teradataml/plot/plot.py +810 -0
  1153. teradataml/plot/query_generator.py +83 -0
  1154. teradataml/plot/subplot.py +216 -0
  1155. teradataml/scriptmgmt/UserEnv.py +4273 -0
  1156. teradataml/scriptmgmt/__init__.py +3 -0
  1157. teradataml/scriptmgmt/lls_utils.py +2157 -0
  1158. teradataml/sdk/README.md +79 -0
  1159. teradataml/sdk/__init__.py +4 -0
  1160. teradataml/sdk/_auth_modes.py +422 -0
  1161. teradataml/sdk/_func_params.py +487 -0
  1162. teradataml/sdk/_json_parser.py +453 -0
  1163. teradataml/sdk/_openapi_spec_constants.py +249 -0
  1164. teradataml/sdk/_utils.py +236 -0
  1165. teradataml/sdk/api_client.py +900 -0
  1166. teradataml/sdk/constants.py +62 -0
  1167. teradataml/sdk/modelops/__init__.py +98 -0
  1168. teradataml/sdk/modelops/_client.py +409 -0
  1169. teradataml/sdk/modelops/_constants.py +304 -0
  1170. teradataml/sdk/modelops/models.py +2308 -0
  1171. teradataml/sdk/spinner.py +107 -0
  1172. teradataml/series/__init__.py +0 -0
  1173. teradataml/series/series.py +537 -0
  1174. teradataml/series/series_utils.py +71 -0
  1175. teradataml/store/__init__.py +12 -0
  1176. teradataml/store/feature_store/__init__.py +0 -0
  1177. teradataml/store/feature_store/constants.py +658 -0
  1178. teradataml/store/feature_store/feature_store.py +4814 -0
  1179. teradataml/store/feature_store/mind_map.py +639 -0
  1180. teradataml/store/feature_store/models.py +7330 -0
  1181. teradataml/store/feature_store/utils.py +390 -0
  1182. teradataml/table_operators/Apply.py +979 -0
  1183. teradataml/table_operators/Script.py +1739 -0
  1184. teradataml/table_operators/TableOperator.py +1343 -0
  1185. teradataml/table_operators/__init__.py +2 -0
  1186. teradataml/table_operators/apply_query_generator.py +262 -0
  1187. teradataml/table_operators/query_generator.py +493 -0
  1188. teradataml/table_operators/table_operator_query_generator.py +462 -0
  1189. teradataml/table_operators/table_operator_util.py +726 -0
  1190. teradataml/table_operators/templates/dataframe_apply.template +184 -0
  1191. teradataml/table_operators/templates/dataframe_map.template +176 -0
  1192. teradataml/table_operators/templates/dataframe_register.template +73 -0
  1193. teradataml/table_operators/templates/dataframe_udf.template +67 -0
  1194. teradataml/table_operators/templates/script_executor.template +170 -0
  1195. teradataml/telemetry_utils/__init__.py +0 -0
  1196. teradataml/telemetry_utils/queryband.py +53 -0
  1197. teradataml/utils/__init__.py +0 -0
  1198. teradataml/utils/docstring.py +527 -0
  1199. teradataml/utils/dtypes.py +943 -0
  1200. teradataml/utils/internal_buffer.py +122 -0
  1201. teradataml/utils/print_versions.py +206 -0
  1202. teradataml/utils/utils.py +451 -0
  1203. teradataml/utils/validators.py +3305 -0
  1204. teradataml-20.0.0.8.dist-info/METADATA +2804 -0
  1205. teradataml-20.0.0.8.dist-info/RECORD +1208 -0
  1206. teradataml-20.0.0.8.dist-info/WHEEL +5 -0
  1207. teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
  1208. teradataml-20.0.0.8.dist-info/zip-safe +1 -0
@@ -0,0 +1,4115 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2025 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Kesavaragavan B (kesavaragavan.b@Teradata.com)
7
+ # Secondary Owner: Pankaj Purandare (PankajVinod.Purandare@teradata.com),
8
+ # Pradeep Garre (pradeep.garre@teradata.com)
9
+ #
10
+ # This file implements Hyperparameter Tuning feature which is used for
11
+ # model optimization. Optimizer contains following algorithms
12
+ # GridSearch and RandomSearch for hyperaparameter tuning.
13
+ #
14
+ # ##################################################################
15
+
16
+ import numpy as np
17
+ import pandas as pd
18
+ import random
19
+ import time
20
+ import threading
21
+ from itertools import product
22
+ from collections import defaultdict
23
+ from teradataml import DataFrame, valib, TeradataMlException
24
+ from teradataml.common.messages import Messages, MessageCodes
25
+ from teradataml.hyperparameter_tuner.utils import _ProgressBar
26
+ from teradataml.utils.utils import _AsyncDBExecutor
27
+ from teradataml.utils.validators import _Validators
28
+ from teradataml.options.configure import configure
29
+ from teradataml.common.constants import TeradataConstants
30
+
31
+
32
+ class _BaseSearch:
33
+ """Base class for hyperparameter optimization."""
34
+
35
+ def __init__(self, func, params):
36
+ """
37
+ Constructor for _BaseSearch.
38
+ PARAMETERS:
39
+ func:
40
+ Required Argument.
41
+ Specifies a teradataml analytic function.
42
+ Types:
43
+ teradataml Analytic Functions
44
+ * Advanced analytic functions
45
+ * UAF
46
+ * VAL
47
+ Refer to display_analytic_functions()
48
+ function for list of functions.
49
+
50
+ params:
51
+ Optional Argument.
52
+ Specifies the parameter(s) of a teradataml function.
53
+ Types: dict
54
+
55
+ RAISES:
56
+ TeradataMlException, TypeError, ValueError
57
+
58
+ RETURNS:
59
+ None
60
+
61
+ EXAMPLES:
62
+
63
+ >>> # Let's initialize parameters for BaseSearch.
64
+ >>> func_params = {"data" : antiselect_input,
65
+ "exclude" : (['rowids','orderdate'], ['orderdate'])}
66
+
67
+ >>> # Create instance of _BaseSearch.
68
+ >>> bs_obj = _BaseSearch(func=Antiselect, params=func_params)
69
+ """
70
+
71
+ # Argument validation.
72
+ # Validate argument types.
73
+ awu_matrix = []
74
+ awu_matrix.append(["params", params, True, dict, True])
75
+ _Validators._validate_function_arguments(awu_matrix)
76
+
77
+ # Model trainer function supports evaluation.
78
+ self._SQLE_TRAINABLE_FUNCS = {"DecisionForest", "GLM", "GLMPerSegment",
79
+ "KMeans", "KNN", "OneClassSVM", "SVM", "XGBoost",
80
+ "NaiveBayesTextClassifierTrainer"}
81
+
82
+ # Data passed in fit method is sampled and internally test dataset
83
+ # is passed with following argument name for predictions and evaluation.
84
+ self._TRAINABLE_FUNCS_DATA_MAPPER = {"DecisionForest": "newdata", "GLM": "newdata",
85
+ "GLMPerSegment": "newdata", "KMeans": "data",
86
+ "KNN": "test_data", "OneClassSVM": "newdata",
87
+ "SVM": "newdata", "XGBoost": "newdata",
88
+ "NaiveBayesTextClassifierTrainer": "newdata",
89
+ "DecisionTree": "data", "KMeans": "data",
90
+ "LinReg": "data", "LogReg": "data", "PCA": "data",
91
+ "LinearRegression": "data", "Lasso": "data",
92
+ "Ridge": "data", "ARDRegression": "data",
93
+ "BayesianRidge": "data", "TweedieRegressor": "data",
94
+ "TheilSenRegressor": "data", "SGDRegressor": "data",
95
+ "RidgeCV": "data", "RANSACRegressor": "data",
96
+ "PoissonRegressor": "data", "PassiveAggressiveRegressor": "data",
97
+ "OrthogonalMatchingPursuitCV": "data", "OrthogonalMatchingPursuit": "data",
98
+ "MultiTaskLassoCV": "data", "MultiTaskLasso": "data",
99
+ "MultiTaskElasticNetCV": "data", "MultiTaskElasticNet": "data",
100
+ "LassoLarsIC": "data", "LassoLarsCV": "data", "LassoLars": "data",
101
+ "LassoCV": "data", "LarsCV": "data", "Lars": "data",
102
+ "HuberRegressor": "data", "GammaRegressor": "data",
103
+ "ElasticNetCV": "data", "ElasticNet": "data",
104
+ "LogisticRegression": "data", "RidgeClassifier": "data",
105
+ "RidgeClassifierCV": "data", "SGDClassifier": "data",
106
+ "PassiveAggressiveClassifier": "data", "Perceptron": "data",
107
+ "LogisticRegressionCV": "data"}
108
+
109
+ self._UAF_TRAINABLE_FUNCS = {"ArimaEstimate", "LinearRegr", "MAMean",
110
+ "MultivarRegr", "SimpleExp"}
111
+ self._VAL_TRAINABLE_FUNCS = {"DecisionTree", "KMeans", "LinReg", "LogReg", "PCA"}
112
+
113
+ # Unsupervised model trainer functions. These models are suitable
114
+ # for prediction rather than evaluation.
115
+ self.__US_TRAINABLE_FUNCS = {"KMeans", "OneClassSVM", "PCA"}
116
+
117
+ # Evaluation approach for model evaluable functions were "True" means
118
+ # higher the score is better, and vice versa.
119
+ self.__func_comparator = {'MAE': False,
120
+ 'MSE': False,
121
+ 'MSLE': False,
122
+ 'MAPE': False,
123
+ 'RMSE': False,
124
+ 'RMSLE': False,
125
+ 'ME': False,
126
+ 'R2': True,
127
+ 'EV': True,
128
+ 'MPE': False,
129
+ 'MPD': False,
130
+ 'MGD': False,
131
+ 'ACCURACY': True,
132
+ 'MICRO-PRECISION': True,
133
+ 'MICRO-RECALL': True,
134
+ 'MICRO-F1': True,
135
+ 'MACRO-PRECISION': True,
136
+ 'MACRO-RECALL': True,
137
+ 'MACRO-F1': True,
138
+ 'WEIGHTED-PRECISION': True,
139
+ 'WEIGHTED-RECALL': True,
140
+ 'WEIGHTED-F1': True,
141
+ 'SILHOUETTE': True,
142
+ 'CALINSKI': True,
143
+ 'DAVIES': True}
144
+
145
+ # OpenSource ML function comparator (excluding MPD, MGD, MTD, RMSE, RMSLE)
146
+ self.__osml_func_comparator = {k: v for k, v in self.__func_comparator.items()
147
+ if k not in ['MPD', 'MGD', 'MTD', 'RMSE', 'RMSLE']}
148
+
149
+ # Linear model categorization lists for sklearn models
150
+ self._LINEAR_REGRESSION_MODELS = {
151
+ "ARDRegression", "BayesianRidge", "TweedieRegressor", "TheilSenRegressor",
152
+ "SGDRegressor", "RidgeCV", "Ridge", "RANSACRegressor", "PoissonRegressor",
153
+ "PassiveAggressiveRegressor", "OrthogonalMatchingPursuitCV", "OrthogonalMatchingPursuit",
154
+ "MultiTaskLassoCV", "MultiTaskLasso", "MultiTaskElasticNetCV", "MultiTaskElasticNet",
155
+ "LinearRegression", "LassoLarsIC", "LassoLarsCV", "LassoLars", "LassoCV",
156
+ "Lasso", "LarsCV", "Lars", "HuberRegressor", "GammaRegressor",
157
+ "ElasticNetCV", "ElasticNet"
158
+ }
159
+
160
+ self._LINEAR_CLASSIFICATION_MODELS = {
161
+ "SGDClassifier", "RidgeClassifierCV", "RidgeClassifier", "Perceptron",
162
+ "PassiveAggressiveClassifier", "LogisticRegressionCV", "LogisticRegression"
163
+ }
164
+
165
+ self._CLUSTERING_MODELS = {
166
+ "KMeans", "GaussianMixture"
167
+ }
168
+ self.__func = func
169
+ self.__params = params
170
+ # "self.__best_model" contains best model.
171
+ self.__best_model = None
172
+ # "self.__evaluation_metric" contains evaluation metric considered for
173
+ # evaluation.
174
+ self.__evaluation_metric = None
175
+ # "self.__eval_params" contains evaluation parameter will be used for
176
+ # trained model evaluation.
177
+ self.__eval_params = None
178
+ # "self.__early_stop" contains expected evaluation value considered for
179
+ # evaluation.
180
+ self.__early_stop = None
181
+ # "self._parameter_grid" contains parameter combinations.
182
+ self._parameter_grid = None
183
+ # "self.__best_score_" contains best model score.
184
+ self.__best_score_ = None
185
+ # "self.__best_model_id" contains best model ID.
186
+ self.__best_model_id = None
187
+ # "self.__best_params_" contains best model parameters.
188
+ self.__best_params_ = None
189
+ # "__model_stats" contains "model_id" and corresponding evaluation
190
+ # metrics as a DataFrame.
191
+ self.__model_stats = None
192
+ # "self.__models" contains "model_id", "params", "accuracy", and "status"
193
+ # will be stored as a DataFrame.
194
+ self.__models = None
195
+ # HPT complete execution results including "model_stats" informations recorded.
196
+ self.__model_eval_records = list()
197
+ # "self.__trained_models" is an internal attribute to keep track of
198
+ # "model_id" and the associated function objects.
199
+ self.__trained_models = dict()
200
+ # "__train_data" contains training data for model trainer and unsupervised
201
+ # model trainer functions.
202
+ self.__train_data = None
203
+ # "__test_data" contains testing data for model trainer function.
204
+ self.__test_data = None
205
+ # Default model will be used for predict and evaluate after HPT execution.
206
+ self.__default_model = None
207
+ # 'self.__is_finite' will indicate whether the chosen '__evaluation_metric'
208
+ # contains 'NaN', '-inf' or 'inf' values.
209
+ self.__is_finite = True
210
+ # '__is_fit_called' specifies whether a fit method is called by user.
211
+ # This helps 'is_running' method to identify the model training state.
212
+ self.__is_fit_called = False
213
+ # "__model_trainer_input_data" contains the model trainer data when input data is passed along with params.
214
+ self.__model_trainer_input_data = None
215
+ # Constant name for data identifier.
216
+ self.__DATA_ID = "data_id"
217
+ # '__progress_bar' holds progress bar obj when verbose is set.
218
+ self.__progress_bar = None
219
+ # '__model_err_records' holds error messages of failed model.
220
+ self.__model_err_records = dict()
221
+ # '__parallel_stop_event' is used to stop threads in parallel execution.
222
+ self.__parallel_stop_event = None
223
+
224
+
225
+ # Set the function feature type and supported functionality.
226
+ self.__is_sqle_function = False
227
+ self.__is_uaf_function = False
228
+ self.__is_val_function = True if "valib" in str(self.__func.__module__)\
229
+ else False
230
+ self.__is_opensource_model = False
231
+ self.__is_clustering_model = False
232
+ self.__is_regression_model = False
233
+ self.__is_classification_model = False
234
+ self.model_id_counter = {}
235
+
236
+ # Import sklearn wrapper class for proper type checking
237
+ from teradataml.opensource._sklearn import _SkLearnObjectWrapper
238
+
239
+ if hasattr(func, "modelObj") and isinstance(func, _SkLearnObjectWrapper):
240
+ self.__is_opensource_model = True
241
+ self.__is_trainable = True
242
+ self.__is_evaluatable = True
243
+ self.__is_predictable = True
244
+
245
+ # Set the function name and class
246
+ self.__func_name = func.modelObj.__class__.__name__ # e.g., 'KMeans'
247
+ self.__func = func.__class__
248
+ if self.__func_name in self._CLUSTERING_MODELS:
249
+ self.__is_clustering_model = True
250
+ self.__is_evaluatable = False
251
+ elif self.__func_name in self._LINEAR_REGRESSION_MODELS:
252
+ self.__is_regression_model = True
253
+ elif self.__func_name in self._LINEAR_CLASSIFICATION_MODELS:
254
+ self.__is_classification_model = True
255
+ else:
256
+ self.__func_name = func._tdml_valib_name if "_VALIB" in str(func.__class__) \
257
+ else func.__name__
258
+ if self.__func_name in self._VAL_TRAINABLE_FUNCS and self.__is_val_function:
259
+ # TODO: Enable these feature once merge model supports VAL functions.
260
+ # This case is for VAL model trainer functions.
261
+ self.__is_trainable = self.__is_evaluatable = \
262
+ self.__is_predictable = False
263
+ elif self.__func_name in self._UAF_TRAINABLE_FUNCS:
264
+ # TODO: Enable these feature once merge model supports UAF functions.
265
+ # This case is for UAF model trainer functions.
266
+ self.__is_uaf_function = self.__is_trainable = \
267
+ self.__is_evaluatable = False
268
+ self.__is_predictable = False
269
+ elif self.__func_name in self._SQLE_TRAINABLE_FUNCS:
270
+ # This case is for SQLE model trainer functions.
271
+ self.__is_sqle_function = self.__is_trainable = \
272
+ self.__is_evaluatable = self.__is_predictable = True
273
+ else:
274
+ # This case is for non-model trainer functions.
275
+ self.__is_trainable = self.__is_evaluatable = \
276
+ self.__is_predictable = False
277
+
278
+ self.__is_evaluatable = False if not self.__is_evaluatable or \
279
+ self.__func_name in self.__US_TRAINABLE_FUNCS else \
280
+ True
281
+ # Set train routine based on model type.
282
+ # Non-model trainer routine is used for unsupervised model function training.
283
+ self._execute_fit = self.__model_trainer_routine if self.__is_trainable \
284
+ and (self.__is_evaluatable or self.__is_clustering_model) else \
285
+ self.__non_model_trainer_routine
286
+
287
+ # Utility lambda functions.
288
+ # '_is_best_metrics' function is to check whether current trained model
289
+ # evaluation value is better than existing "self.__best_model" score.
290
+ self._is_best_metrics = lambda curr_score: curr_score > self.__best_score_ \
291
+ if self.__func_comparator[self.__evaluation_metric] \
292
+ else curr_score < self.__best_score_
293
+ # '_is_early_stoppable' function is to check whether HPT execution reached
294
+ # "self.__early_stop" value.
295
+ self._is_early_stoppable = lambda : self.__best_score_ >= self.__early_stop \
296
+ if self.__func_comparator[self.__evaluation_metric] \
297
+ else self.__best_score_ <= self.__early_stop
298
+
299
+ # '_is_time_stoppable' function is to check whether HPT execution reached self.__timeout value.
300
+ self._is_time_stoppable = lambda : True if time.time() - self.__start_time >= self.__timeout else False
301
+
302
+ # Special case comparator for "MPE" metrics.
303
+ # When "curr_score" argument is 'None' then lambda function checks
304
+ # for '_is_early_stoppable'. Otherwise, it checks for '_is_best_metrics'.
305
+ self._spl_abs_comparator = lambda curr_score=None: \
306
+ abs(curr_score) < abs(self.__best_score_) \
307
+ if curr_score is not None else \
308
+ abs(self.__best_score_) <= abs(self.__early_stop)
309
+
310
+ # '_generate_model_name' function is used to create new model name
311
+ # for every iteration.
312
+ self._generate_model_name = lambda iter: "{}_{}".format(\
313
+ self.__func_name.upper(), str(iter))
314
+
315
+ # '__is_model_training_completed' function to check whether all models are
316
+ # executed based on model evaluation records. Function returns true, when all
317
+ # models are executed and evaluation reports are updated. Otherwise,
318
+ # returns false.
319
+ self.__is_model_training_completed = lambda : self.__is_fit_called and \
320
+ len(self.__model_eval_records) < \
321
+ len(self._parameter_grid)
322
+
323
+ # '_generate_dataframe_name' function is used to create new dataframe ID
324
+ # for given iteration.
325
+ self._generate_dataframe_name = lambda df_name, iter: "{}_{}".format(df_name, str(iter))
326
+
327
+ # '_get_train_data_arg' function is used to return model trainer function
328
+ # train argument name.
329
+ self._get_model_trainer_train_data_arg = lambda : "train_data" if \
330
+ self.__func_name == "KNN" else "data"
331
+
332
+ # '_get_predict_column' function is used to generate prediction column name.
333
+ self._get_predict_column = lambda: f"{self.__func_name.lower()}_predict_1"
334
+
335
+ if self.__is_trainable and "data" in self.__params:
336
+ data = self.__params.pop("data")
337
+ self.__validate_model_trainer_input_data_argument(data, False)
338
+ self.__model_trainer_input_data = data
339
+
340
+
341
+ def set_parameter_grid(self):
342
+ """
343
+ DESCRIPTION:
344
+ Set the value of the attribute _parameter_grid.
345
+
346
+ RETURNS:
347
+ None
348
+
349
+ EXAMPLES:
350
+ >>> self.set_parameter_grid()
351
+ """
352
+ self._parameter_grid = self.__populate_parameter_grid()
353
+ def get_parameter_grid(self):
354
+ """
355
+ DESCRIPTION:
356
+ Returns the value of the attribute _parameter_grid.
357
+
358
+ RETURNS:
359
+ dict
360
+
361
+ EXAMPLES:
362
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
363
+ >>> # by referring "__init__()" method.
364
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
365
+ >>> # Retrieve parameter grid.
366
+ >>> optimizer_obj.get_parameter_grid()
367
+ [{'param': {'input_columns': ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms',
368
+ 'Population', 'AveOccup', 'Latitude', 'Longitude'],
369
+ 'response_column': 'MedHouseVal', 'model_type': 'regression',
370
+ 'batch_size': 75, 'iter_max': 100, 'lambda1': 0.1, 'alpha': 0.5,
371
+ 'iter_num_no_change': 60, 'tolerance': 0.01, 'intercept': False,
372
+ 'learning_rate': 'INVTIME', 'initial_data': 0.5, 'decay_rate': 0.5,
373
+ 'momentum': 0.6, 'nesterov': True, 'local_sgd_iterations': 1,
374
+ 'data': '"ALICE"."ml__select__1696593660430612"'},
375
+ 'data_id': 'DF_0'},
376
+ {'param': {'input_columns': ['MedInc', 'HouseAge', 'AveRooms', 'AveBedrms',
377
+ 'Population', 'AveOccup', 'Latitude', 'Longitude'],
378
+ 'response_column': 'MedHouseVal', 'model_type': 'regression',
379
+ 'batch_size': 75, 'iter_max': 100, 'lambda1': 0.1, 'alpha': 0.5,
380
+ 'iter_num_no_change': 60, 'tolerance': 0.01, 'intercept': False,
381
+ 'learning_rate': 'INVTIME', 'initial_data': 0.5, 'decay_rate': 0.5,
382
+ 'momentum': 0.6, 'nesterov': True, 'local_sgd_iterations': 1,
383
+ 'data': '"ALICE"."ml__select__1696593660430612"'},
384
+ 'data_id': 'DF_1'}]
385
+ """
386
+ return self._parameter_grid
387
+
388
+ @property
389
+ def models(self):
390
+ """
391
+ DESCRIPTION:
392
+ Returns the generated models metadata.
393
+
394
+ RETURNS:
395
+ pandas DataFrame
396
+
397
+ EXAMPLES:
398
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
399
+ >>> # by referring "__init__()" method.
400
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
401
+ >>> # Retrieve models metadata.
402
+ >>> optimizer_obj.models
403
+ MODEL_ID DATA_ID PARAMETERS STATUS MAE
404
+ 0 SVM_3 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
405
+ 1 SVM_0 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.660815
406
+ 2 SVM_1 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.660815
407
+ 3 SVM_2 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
408
+ 4 SVM_4 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
409
+ 5 SVM_5 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
410
+
411
+ """
412
+ # All the models are stored in a dictionary '__model_eval_records'. Since
413
+ # "models" return a pandas DataFrame, one has to construct pandas DataFrame
414
+ # from "__models". This construction should be done only if it is
415
+ # appropriate, i.e., when a new model is pushed to "__model_eval_records",
416
+ # only then construct the pandas Dataframe for models. Otherwise, store
417
+ # it and use it. Check a new model record is generated or not by
418
+ # comparing the number of model records present in '__model_eval_records'
419
+ # with existing number of records in '__models'.
420
+ _is_models_updated = self.__models is None or \
421
+ len(self.__model_eval_records) != self.__models.shape[0]
422
+
423
+ # Update the '__models' when model records are updated.
424
+ if _is_models_updated :
425
+ # Set the '__models' variable with models metadata.
426
+
427
+ # Set the columns based on teradataml analytics function type.
428
+ _df_cols = ["MODEL_ID", "PARAMETERS", "STATUS"]
429
+
430
+ if self.__is_trainable:
431
+ _df_cols.insert(1, self.__DATA_ID.upper())
432
+
433
+ # Include evaluation metrics for model trainer functions.
434
+ if self.__evaluation_metric:
435
+ _df_cols.append(self.__evaluation_metric)
436
+
437
+ # Replace the teradataml DataFrame with 'table_name'.
438
+ # Convert "PARAMETERS" from dictionary to string datatype.
439
+ for index, records in enumerate(self.__model_eval_records):
440
+ # Check whether "PARAMETERS" record contains a dictionary parameter.
441
+ if isinstance(records["PARAMETERS"], dict):
442
+ # Replace the dataframe with table name and typecast the type
443
+ # of model training parameters to string.
444
+ for key, value in records["PARAMETERS"].items():
445
+ if isinstance(value, DataFrame):
446
+ records["PARAMETERS"][key] = \
447
+ value._table_name
448
+ records["PARAMETERS"] = str(records["PARAMETERS"])
449
+
450
+ # Create pandas dataframe for recorded evaluation report.
451
+ self.__models = pd.DataFrame(self.__model_eval_records,
452
+ columns=_df_cols)
453
+
454
+ return self.__models
455
+
456
+ @property
457
+ def best_score_(self):
458
+ """
459
+ DESCRIPTION:
460
+ Returns the best score of the model out of all generated models.
461
+ Note:
462
+ "best_score_" is not supported for non-model trainer functions.
463
+
464
+ RETURNS:
465
+ String representing the best score.
466
+
467
+ EXAMPLES:
468
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
469
+ >>> # by referring "__init__()" method.
470
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
471
+ >>> # Retrieve the best score.
472
+ >>> optimizer_obj.best_score_
473
+ 2.060386
474
+ """
475
+ return self.__best_score_
476
+
477
+ @property
478
+ def best_model_id(self):
479
+ """
480
+ DESCRIPTION:
481
+ Returns the model id of the model with best score.
482
+ Note:
483
+ "best_model_id" is not supported for non-model trainer functions.
484
+
485
+ RETURNS:
486
+ String representing the best model id.
487
+
488
+ EXAMPLES:
489
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
490
+ >>> # by referring "__init__()" method.
491
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
492
+ >>> # Retrieve the best model id.
493
+ >>> optimizer_obj.best_model_id
494
+ 'SVM_2'
495
+ """
496
+ return self.__best_model_id
497
+
498
+ @property
499
+ def best_params_(self):
500
+ """
501
+ DESCRIPTION:
502
+ Returns the parameters used for the model with best score.
503
+ Note:
504
+ "best_params_" is not supported for non-model trainer functions.
505
+
506
+ RETURNS:
507
+ dict
508
+
509
+ EXAMPLES:
510
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
511
+ >>> # by referring "__init__()" method.
512
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
513
+ >>> # Retrieve the best parameters.
514
+ >>> optimizer_obj.best_params_
515
+ {'input_columns': ['MedInc', 'HouseAge', 'AveRooms',
516
+ 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude'],
517
+ 'response_column': 'MedHouseVal', 'model_type': 'regression',
518
+ 'batch_size': 50, 'iter_max': 301, 'lambda1': 0.1, 'alpha': 0.5,
519
+ 'iter_num_no_change': 60, 'tolerance': 0.01, 'intercept': False,
520
+ 'learning_rate': 'INVTIME', 'initial_data': 0.5, 'decay_rate': 0.5,
521
+ 'momentum': 0.6, 'nesterov': True, 'local_sgd_iterations': 1,
522
+ 'data': '"ALICE"."ml__select__1696595493985650"'}
523
+ """
524
+ return self.__best_params_
525
+
526
+ @property
527
+ def best_model(self):
528
+ """
529
+ DESCRIPTION:
530
+ Returns the best trained model obtained from hyperparameter tuning.
531
+ Note:
532
+ "best_model" is not supported for non-model trainer functions.
533
+
534
+ RETURNS:
535
+ object of trained model.
536
+
537
+ EXAMPLES:
538
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
539
+ >>> # by referring "__init__()" method.
540
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
541
+ >>> # Retrieve the best model.
542
+ >>> optimizer_obj.best_model
543
+ ############ output_data Output ############
544
+
545
+ iterNum loss eta bias
546
+ 0 3 2.060386 0.028868 0.0
547
+ 1 5 2.055509 0.022361 0.0
548
+ 2 6 2.051982 0.020412 0.0
549
+ 3 7 2.048387 0.018898 0.0
550
+ 4 9 2.041521 0.016667 0.0
551
+ 5 10 2.038314 0.015811 0.0
552
+ 6 8 2.044882 0.017678 0.0
553
+ 7 4 2.058757 0.025000 0.0
554
+ 8 2 2.065932 0.035355 0.0
555
+ 9 1 1.780877 0.050000 0.0
556
+
557
+
558
+ ############ result Output ############
559
+
560
+ predictor estimate value
561
+ attribute
562
+ 7 Latitude 0.155095 None
563
+ -9 Learning Rate (Initial) 0.050000 None
564
+ -17 OneClass SVM NaN FALSE
565
+ -14 Epsilon 0.100000 None
566
+ 5 Population 0.000000 None
567
+ -12 Nesterov NaN TRUE
568
+ -5 BIC 73.297397 None
569
+ -7 Alpha 0.500000 Elasticnet
570
+ -3 Number of Observations 55.000000 None
571
+ 0 (Intercept) 0.000000 None
572
+
573
+ """
574
+ return self.__best_model
575
+
576
+ @property
577
+ def best_sampled_data_(self):
578
+ """
579
+ DESCRIPTION:
580
+ Returns the best sampled data used for training the best model.
581
+ Note:
582
+ "best_sampled_data_" is not supported for non-model trainer functions.
583
+
584
+ RETURNS:
585
+ list of DataFrames.
586
+
587
+ EXAMPLES:
588
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
589
+ >>> # by referring "__init__()" method.
590
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
591
+ >>> # Retrieve the best sampled data.
592
+ >>> optimizer_obj.best_sampled_data_
593
+ [{'data': id MedHouseVal MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude Longitude
594
+ 0 5233 0.955 -0.895906 0.680467 -0.387272 -0.202806 -0.125930 2.130214 -0.754303 0.653775
595
+ 1 10661 3.839 2.724825 -1.258313 0.876263 -1.142947 -0.751004 -0.187396 -0.878298 0.852744
596
+ 2 10966 1.896 0.057849 0.343287 -0.141762 -0.664624 -0.095545 0.588981 -0.829586 0.815727
597
+ 3 3687 1.741 -0.383816 -1.679787 -0.849458 0.108000 0.718354 1.083500 -0.630308 0.593621
598
+ 4 7114 2.187 -0.245392 0.258993 0.225092 -0.205781 -0.171508 -0.035650 -0.763160 0.755573
599
+ 5 5300 3.500 -0.955800 -1.005429 -1.548811 -0.130818 2.630473 -0.601956 -0.696734 0.556604
600
+ 6 686 1.578 -0.152084 -0.078186 -0.625426 -0.513581 -0.685892 -0.533101 0.906345 -1.141575
601
+ 7 9454 0.603 -1.109609 -0.499660 0.355748 0.379188 -0.364674 -0.356799 1.827451 -1.655193
602
+ 8 5202 1.000 -0.307539 1.101940 -0.379623 -0.570271 -0.141123 0.595366 -0.754303 0.635266
603
+ 9 5769 2.568 -0.413546 0.343287 -0.922324 -0.028824 1.165456 0.031374 -0.656879 0.626012},
604
+ {'newdata': id MedHouseVal MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude Longitude
605
+ 0 1754 1.651 -0.026315 0.596172 0.454207 -0.027273 0.068320 -0.082765 1.017055 -1.234118
606
+ 1 3593 2.676 1.241775 0.090403 1.024283 -0.367626 -0.045626 0.252048 -0.621452 0.542722
607
+ 2 7581 1.334 -0.714880 -1.258313 -0.604140 -0.259612 3.058041 0.857406 -0.776445 0.658402
608
+ 3 8783 2.500 -0.170156 0.596172 0.163717 0.398242 -0.668529 -0.728130 -0.820729 0.621385
609
+ 4 5611 1.587 -0.712366 -0.415366 -1.275716 0.012960 0.860515 0.764870 -0.820729 0.639893
610
+ 5 244 1.117 -0.605796 1.101940 -0.160367 0.426668 1.022209 1.041018 0.946201 -1.187846}]
611
+ """
612
+ return self.__sampled_df_mapper[self.__best_data_id]
613
+
614
+ @property
615
+ def best_data_id(self):
616
+ """
617
+ DESCRIPTION:
618
+ Returns the "data_id" of a sampled data used for training the best model.
619
+ Note:
620
+ "best_data_id" is not supported for non-model trainer functions.
621
+
622
+ RETURNS:
623
+ String representing the best "data_id"
624
+
625
+ EXAMPLES:
626
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
627
+ >>> # by referring "__init__()" method.
628
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
629
+ >>> # Retrieve the best data id.
630
+ >>> optimizer_obj.best_data_id
631
+ DF_0
632
+ """
633
+ return self.__best_data_id
634
+
635
+ @property
636
+ def model_stats(self):
637
+ """
638
+ DESCRIPTION:
639
+ Returns the model statistics of the model with best score.
640
+
641
+ RETURNS:
642
+ pandas DataFrame.
643
+
644
+ EXAMPLES:
645
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
646
+ >>> # by referring "__init__()" method.
647
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
648
+ >>> # Retrieve the model stats.
649
+ >>> optimizer_obj.model_stats
650
+ MODEL_ID DATA_ID PARAMETERS STATUS MAE
651
+ 0 SVM_3 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772`
652
+ 1 SVM_0 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.660815
653
+ 2 SVM_1 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.660815
654
+ 3 SVM_2 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
655
+ 4 SVM_4 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
656
+ 5 SVM_5 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772`
657
+
658
+ """
659
+
660
+ if not (self.__is_evaluatable or self.__is_clustering_model):
661
+ # Raise error when "model_stats" attribute accessed for non-executable
662
+ # functions.
663
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
664
+ "retrieve 'model_stats' attribute",
665
+ "'model_stats' attribute not applicable "\
666
+ "for non-evaluatable function.")
667
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
668
+ elif len(self.__model_eval_records) == 0:
669
+ # Raise error when no records are found.
670
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
671
+ "retrieve 'model_stats' attribute", \
672
+ "No records found in 'model_stats' " \
673
+ "attribute.")
674
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
675
+
676
+
677
+ # All the models records are stored in a dictionary '__model_eval_records'.
678
+ # Since "model_stats" return a pandas DataFrame, one has to construct
679
+ # pandas DataFrame from "__model_stats". This construction should be done
680
+ # only if it is appropriate, i.e., when a new model record is pushed to
681
+ # "__model_eval_records", only then construct the pandas Dataframe for
682
+ # model_stats. Otherwise, store it and use it. Check a new model record is
683
+ # generated or not by comparing the number of model records present in
684
+ # '__model_eval_records' with existing number of records in '__model_stats'.
685
+ _is_model_stats_updated = self.__model_stats is None or \
686
+ len(self.__model_eval_records) != \
687
+ self.__model_stats.shape[0]
688
+
689
+ # Update the '__models' when model stats records are updated.
690
+ if _is_model_stats_updated:
691
+ # Set the '__model_stats' with model evaluation report.
692
+
693
+ # Exclude "models" attribute specific columns.
694
+ _df_cols = ["PARAMETERS", "STATUS", self.__DATA_ID.upper()]
695
+
696
+ # Create pandas dataframe for recorded evaluation report by excluding
697
+ # 'PARAMETERS' and 'STATUS' columns.
698
+ self.__model_stats = pd.DataFrame(self.__model_eval_records).drop(\
699
+ columns=_df_cols, axis=1)
700
+
701
+ return self.__model_stats
702
+
703
+ def is_running(self):
704
+ """
705
+ DESCRIPTION:
706
+ Check whether hyperparameter tuning is completed or not. Function
707
+ returns True when execution is in progress. Otherwise it returns False.
708
+
709
+ PARAMETERS:
710
+ None
711
+
712
+ RAISES:
713
+ None
714
+
715
+ RETURNS:
716
+ bool
717
+
718
+ EXAMPLES:
719
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
720
+ >>> # by referring "__init__()" method.
721
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
722
+ >>> # Retrieve the model execution status.
723
+ >>> optimizer_obj.is_running()
724
+ False
725
+ """
726
+ # Check all models are executed based on model training records count.
727
+ # Note: Model training records is updated at the end of execution and
728
+ # list append operation is thread-safe. Hence, following method works for
729
+ # both parallel and sequential execution.
730
+ return self.__is_model_training_completed()
731
+
732
+ def _add_data_label(self, arg_name=None):
733
+ """
734
+ DESCRIPTION:
735
+ Internal function to label the teradataml DataFrame for model trainer
736
+ functions. Labels will be added for input data except dictionary
737
+ formatted DataFrame. Since, Dictionary formatted DataFrame contains
738
+ custom data labels.
739
+
740
+ PARAMETERS:
741
+ arg_name:
742
+ Optional Argument.
743
+ Specifies the model trainer argument name for unsupervised
744
+ model trainer functions.
745
+ Notes:
746
+ * "arg_name" argument is not supported for model-trainer functions
747
+ (evaluatable functions). Since, argument names are
748
+ added in data sampling method.
749
+ * "arg_name" is added to training data of unsupervised
750
+ model-trainer functions.
751
+ Types: str
752
+
753
+ RETURNS:
754
+ dictionary
755
+
756
+ RAISES:
757
+ None
758
+
759
+ EXAMPLES:
760
+ >>> # Example 1: tuple of DataFrame is passed.
761
+ >>> # Assign DataFrames to be labeled.
762
+ >>> self.__model_trainer_input_data = (DF1, DF2)
763
+ >>> # Call '_add_data_label' method for labelling.
764
+ >>> self._add_data_label()
765
+ {'DF_0': DF1, 'DF_1': DF2}
766
+
767
+ >>> # Example 2: Dictionary of DataFrame is passed.
768
+ >>> # This test case is specific to unsupervised
769
+ >>> # model trainer functions.
770
+ >>> # Assign labelled dataframes.
771
+ >>> self.__model_trainer_input_data = {"data-1":DF1, "data-2":DF2}
772
+ >>> # Call '_add_data_label' method to add argument name and reframe
773
+ >>> # the structure into generic labelled format.
774
+ >>> self._add_data_label(arg_name="data")
775
+ {"data-1": {'data': DF1}, "data-2": {'data': DF2} }
776
+
777
+ >>> # Example 3: Tuple of DataFrame is passed.
778
+ >>> # This test case is specific to unsupervised
779
+ >>> # model trainer functions.
780
+ >>> # Assign labelled dataframes.
781
+ >>> self.__model_trainer_input_data = (DF1, DF2)
782
+ >>> # Call '_add_data_label' method to add argument name and data
783
+ >>> # labels. Resulting structure contains unique data labels
784
+ >>> # and dictionary formatted.
785
+ >>> # Assign labels for dataframes with data argument name.
786
+ >>> self._add_data_label(arg_name="data")
787
+ {"DF_0": {'data': DF1}, "DF_1": {'data': DF2} }
788
+
789
+ >>> # Example 4: Single DataFrame is passed.
790
+ >>> # Assign DataFrames to be labeled.
791
+ >>> self.__model_trainer_input_data = DF1
792
+ >>> # Call '_add_data_label' method for labelling.
793
+ >>> self._add_data_label()
794
+ {'DF_0': DF1}
795
+ """
796
+
797
+ _labeled_data = {}
798
+
799
+ if isinstance(self.__model_trainer_input_data, DataFrame):
800
+ # Provide default data identifier "DF_0", when
801
+ # '__model_trainer_input_data' contains single DataFrame.
802
+ _df_id = self._generate_dataframe_name("DF",0)
803
+ # Record labeled data using unique data identifier.
804
+ # Note: "arg_name" is added to data of unsupervised model-trainer
805
+ # functions while adding data identifier.
806
+ _labeled_data[_df_id] = self.__model_trainer_input_data if arg_name \
807
+ is None else {arg_name: \
808
+ self.__model_trainer_input_data}
809
+ elif isinstance(self.__model_trainer_input_data, tuple):
810
+ # Assign default data identifier sequence, when
811
+ # '__model_trainer_input_data' contains tuples of DataFrame.
812
+ for _index, _data in enumerate(self.__model_trainer_input_data):
813
+ _df_id = self._generate_dataframe_name("DF",_index)
814
+ # Record labeled data using unique data identifier.
815
+ # Note: "arg_name" is added to data of unsupervised model-trainer
816
+ # functions while adding data identifier.
817
+ _labeled_data[_df_id] = _data if arg_name is None else \
818
+ {arg_name: _data}
819
+ elif isinstance(self.__model_trainer_input_data, dict) and arg_name:
820
+ # This condition updates unsupervised model trainer functions data.
821
+ # Assign "arg_name" to all the data items when
822
+ # '__model_trainer_input_data' contains dictionary format DataFrame.
823
+ # Note: Dictionary keys specifies data identifier (labels) and
824
+ # values specifies DataFrame (training data).
825
+ for _data_id in self.__model_trainer_input_data:
826
+ _arg_name_added = {arg_name: self.__model_trainer_input_data[_data_id]}
827
+ _labeled_data[_data_id] = _arg_name_added
828
+
829
+ return _labeled_data
830
+
831
+ def __perform_train_test_sampling(self, data, frac, stratify_column=None,
832
+ sample_id_column=None, sample_seed=None):
833
+ """
834
+ DESCRIPTION:
835
+ Internal function to perform train test split for multiple DataFrame.
836
+ Train Test split is use 80/20 method for sampling train and test
837
+ DataFrame. After sampling, parameter grid is updated with the train
838
+ and test DataFrame.
839
+
840
+ Notes:
841
+ * Sampled DataFrames are stored in following format.
842
+ [<Train_DF>, <Test_DF>]
843
+ * Each sampled DataFrame mapped with unique data identifier.
844
+
845
+ PARAMETERS:
846
+ data:
847
+ Required Argument.
848
+ Specifies the teradataml DataFrame needs to be sampled.
849
+ Types: dictionary of DataFrame.
850
+
851
+ frac:
852
+ Required Argument.
853
+ Specifies the split percentage of rows to be sampled for training
854
+ and testing dataset. "frac" argument value must range between (0, 1).
855
+ Notes:
856
+ * This "frac" argument is not supported for non-model trainer
857
+ function.
858
+ * The "frac" value is considered as train split percentage and
859
+ The remaining percentage is taken into account for test splitting.
860
+ Types: float
861
+
862
+ sample_seed:
863
+ Optional Argument.
864
+ Specifies the seed value that controls the shuffling applied
865
+ to the data before applying the Train-Test split. Pass an int for
866
+ reproducible output across multiple function calls.
867
+ Notes:
868
+ * When the argument is not specified, different
869
+ runs of the query generate different outputs.
870
+ * It must be in the range [0, 2147483647]
871
+ * Seed is supported for stratify column.
872
+ Types: int
873
+
874
+ stratify_column:
875
+ Optional Argument.
876
+ Specifies column name that contains the labels indicating
877
+ which data needs to be stratified for TrainTest split.
878
+ Notes:
879
+ * seed is supported for stratify column.
880
+ Types: str
881
+
882
+ sample_id_column:
883
+ Optional Argument.
884
+ Specifies the input data column name that has the
885
+ unique identifier for each row in the input.
886
+ Note:
887
+ * Mandatory when "sample_seed" argument is present.
888
+ Types: str
889
+
890
+ RETURNS:
891
+ None
892
+
893
+ RAISES:
894
+ None
895
+
896
+ EXAMPLES:
897
+ >>> _labeled_df = {'DF_0': DF1, 'DF_1': DF2}
898
+ >>> # Sample the labeled DataFrame.
899
+ >>> self.__perform_train_test_sampling(_labeled_df)
900
+ {'DF_0': [{'data':DF1_Train}, {'newdata':DF1_Test}],
901
+ 'DF_1': [{'data':DF2_Train}, {'newdata':DF2_Test}]}
902
+ """
903
+ # Validate the range of "frac" argument value.
904
+ _Validators._validate_argument_range(arg=frac, arg_name='frac',
905
+ lbound=0.0, ubound=1.0)
906
+
907
+ self.__sampled_df_mapper = {}
908
+ for _data_id in data:
909
+ # Setup train, test input data argument name according to function.
910
+ # Apart from "KNN" function all other SQLE, and VAL function takes "data"
911
+ # as training input data argument.
912
+ train_data_arg = self._get_model_trainer_train_data_arg()
913
+ # Test input data argument name varies for all function. So retrieve
914
+ # the stored information.
915
+ test_data_arg = self._TRAINABLE_FUNCS_DATA_MAPPER[self.__func_name]
916
+
917
+ # Perform sampling based on given "frac" value.
918
+ # Consider the "frac" value as train percentage and the remaining
919
+ # as test percentage for train-test-split.
920
+ train_test_sample = data[_data_id].sample(frac=[frac, round(1 - frac, 2)],
921
+ stratify_column=stratify_column,
922
+ id_column=sample_id_column,
923
+ seed=sample_seed)
924
+ # Represent the sample. Otherwise, split consistency is lost.
925
+ train_test_sample.materialize()
926
+
927
+ _sample_id = "sampleid"
928
+ _split_value = [1, 2]
929
+
930
+ # Create train DataFrame.
931
+ _train_data = train_test_sample[\
932
+ train_test_sample[_sample_id] == _split_value[0]].drop(\
933
+ _sample_id, axis = 1)
934
+
935
+ # Create test DataFrame.
936
+ _test_data = train_test_sample[\
937
+ train_test_sample[_sample_id] == _split_value[1]].drop(\
938
+ _sample_id, axis = 1)
939
+
940
+ # Represent train and test dataset.
941
+ _train_data.materialize()
942
+ _test_data.materialize()
943
+
944
+ # Update train and test dataset using data id with train and test
945
+ # arguments. Unique Data-structure to store train and test sampled
946
+ # data for model trainer functions.
947
+ self.__sampled_df_mapper[_data_id] = [{train_data_arg:_train_data},
948
+ {test_data_arg:_test_data}]
949
+
950
+ def __update_model_parameters(self):
951
+ """
952
+ DESCRIPTION:
953
+ Internal function to update the parameter grid with multiple
954
+ dataframe using unique data identifiers. This function perform
955
+ cartesian products on parameter grid and data identifiers.
956
+ Hence, Hyperparameter tuning is performed on all DataFrame.
957
+
958
+ Notes:
959
+ * This function is only applicable for model trainer functions
960
+ (supervised, and unsupervised models).
961
+ * '_sampled_df_mapper' variable must contain labeled data before
962
+ updating parameter grid. Since, unique data identifier is added
963
+ to all parameters present in parameter grid.
964
+
965
+ PARAMETERS:
966
+ None
967
+
968
+ RETURNS:
969
+ None
970
+
971
+ RAISES:
972
+ None
973
+
974
+ EXAMPLES:
975
+ >>> _labeled_df = {'DF_0': DF1, 'DF_1': DF2}
976
+ >>> # Sample the labeled DataFrame.
977
+ >>> self.__perform_train_test_sampling(_labeled_df)
978
+ {'DF_0': [{'data':DF1_Train}, {'newdata':DF1_Test}],
979
+ 'DF_1': [{'data':DF2_Train}, {'newdata':DF2_Test}]}
980
+ >>> self.__update_model_parameters()
981
+ [
982
+ {'param': {'input_columns': ['age', 'survived', 'pclass'],
983
+ 'response_column': 'fare', 'max_depth': 10, 'lambda1': 1000.0,
984
+ 'model_type': 'regression', 'seed': -1, 'shrinkage_factor': 0.1,
985
+ 'iter_num': 2},
986
+ 'data_id': 'DF_0'},
987
+ {'param': {'input_columns': ['age', 'survived', 'pclass'],
988
+ 'response_column': 'fare', 'max_depth': 10, 'lambda1': 1000.0,
989
+ 'model_type': 'regression', 'seed': -1, 'shrinkage_factor': 0.1,
990
+ 'iter_num': 50},
991
+ 'data_id': 'DF_1'}
992
+ ]
993
+ """
994
+ # Get data identifiers.
995
+ _model_ids = self.__sampled_df_mapper.keys()
996
+ # Update '_parameter_grid' with data identifiers by performing
997
+ # cartesian product.
998
+ self._parameter_grid = [{"param":param[0] , self.__DATA_ID:param[1]} for \
999
+ param in product(self._parameter_grid, _model_ids)]
1000
+
1001
+ def __validate_model_trainer_input_data_argument(self, data, is_optional_arg=True):
1002
+ """
1003
+ DESCRIPTION:
1004
+ Internal function to validate input data of model trainer function.
1005
+ This function validates single DataFrame, multiple DataFrame, and
1006
+ multiple DataFrame with user-defined data labels.
1007
+ Notes:
1008
+ * This function is only applicable for model trainer functions
1009
+ (supervised, and unsupervised models).
1010
+
1011
+ PARAMETERS:
1012
+ data:
1013
+ Required Argument.
1014
+ Specifies the input teradataml DataFrame for model trainer function.
1015
+ Notes:
1016
+ * "data" is a required argument for model trainer functions.
1017
+ * "data" is ignored for non-model trainer functions.
1018
+ * "data" can be contain single DataFrame or multiple DataFrame.
1019
+ * Multiple DataFrame must be specified using tuple or Dictionary
1020
+ as follow.
1021
+ * Tuples:
1022
+ gs.fit(data=(df1, df2), **eval_params)
1023
+
1024
+ * Dictionary:
1025
+ gs.fit(data={"data-1":df1, "data-2":df2}, **eval_params)
1026
+ Types: teradataml DataFrame, dictionary, tuples
1027
+
1028
+ is_optional_arg:
1029
+ Optional Argument.
1030
+ Specifies whether passed data argument value is a optional
1031
+ argument or not.
1032
+ Default Value: True
1033
+ Types: bool
1034
+
1035
+ RETURNS:
1036
+ None
1037
+
1038
+ RAISES:
1039
+ TeradataMlException, TypeError, ValueError
1040
+
1041
+ EXAMPLES:
1042
+ >>> self.__validate_model_trainer_input_data_argument(data,
1043
+ _is_optional_arg)
1044
+
1045
+ """
1046
+ # Validate "data" for model trainer functions.
1047
+ arg_info_matrix = []
1048
+ if isinstance(data, tuple):
1049
+ # Validate all DataFrames present in tuples.
1050
+ for _data in data:
1051
+ arg_info_matrix.append(["data", _data, is_optional_arg, (DataFrame)])
1052
+ elif isinstance(data, dict):
1053
+ # Validate all DataFrames present in dictionary format.
1054
+ for _data_id in data:
1055
+ arg_info_matrix.append(["data", data[_data_id], is_optional_arg, (DataFrame)])
1056
+ else:
1057
+ # Validate DataFrames.
1058
+ arg_info_matrix.append(["data", data, is_optional_arg, (DataFrame)])
1059
+ _Validators._validate_function_arguments(arg_info_matrix)
1060
+
1061
+ def _regression_metrics(self, y_true, y_pred):
1062
+ from teradataml import td_sklearn as skl
1063
+
1064
+ ME = skl.max_error(y_true=y_true, y_pred=y_pred)
1065
+
1066
+ MAE = skl.mean_absolute_error(y_true=y_true, y_pred=y_pred)
1067
+
1068
+ MSE = skl.mean_squared_error(y_true=y_true, y_pred=y_pred, squared=False)
1069
+
1070
+ try:
1071
+ MSLE = skl.mean_squared_log_error(y_true=y_true, y_pred=y_pred)
1072
+ except:
1073
+ MSLE = "NA"
1074
+
1075
+ MAPE = skl.mean_absolute_percentage_error(y_true=y_true, y_pred=y_pred)
1076
+
1077
+ R2 = skl.r2_score(y_true=y_true, y_pred=y_pred)
1078
+
1079
+ EV = skl.explained_variance_score(y_true=y_true, y_pred=y_pred)
1080
+
1081
+ MAD = skl.median_absolute_error(y_true=y_true, y_pred=y_pred)
1082
+
1083
+ #TODO: Support for MPD, MGD, MTD will be added in next phase.
1084
+ # Support for RMSE, RMSLE will be added after OpenSourceML scikit-learn version
1085
+ # update as it requires higher version(>1.1.3)
1086
+ """MPD = skl.mean_poisson_deviance(y_true, y_pred)
1087
+ MGD = skl.mean_gamma_deviance(y_true, y_pred)
1088
+ MTD = skl.mean_tweedie_deviance(y_true, y_pred)"""
1089
+
1090
+ keys = ["MAE", "MSE", "MSLE", "MAPE", "R2", "EV", "ME", "MAD"]
1091
+ values = [MAE, MSE, MSLE, MAPE, R2, EV, ME, MAD]
1092
+ return dict(zip(keys, values))
1093
+
1094
+ def _classification_metrics(self, y_true, y_pred):
1095
+ from teradataml import td_sklearn as skl
1096
+
1097
+ # Basic classification metrics
1098
+ accuracy = skl.accuracy_score(y_true=y_true, y_pred=y_pred)
1099
+
1100
+ # Precision, Recall, F1 (micro, macro, weighted averages)
1101
+ micro_precision = skl.precision_score(y_true=y_true, y_pred=y_pred, average='micro')
1102
+ micro_recall = skl.recall_score(y_true=y_true, y_pred=y_pred, average='micro')
1103
+ micro_f1 = skl.f1_score(y_true=y_true, y_pred=y_pred, average='micro')
1104
+
1105
+ macro_precision = skl.precision_score(y_true=y_true, y_pred=y_pred, average='macro')
1106
+ macro_recall = skl.recall_score(y_true=y_true, y_pred=y_pred, average='macro')
1107
+ macro_f1 = skl.f1_score(y_true=y_true, y_pred=y_pred, average='macro')
1108
+
1109
+ weighted_precision = skl.precision_score(y_true=y_true, y_pred=y_pred, average='weighted')
1110
+ weighted_recall = skl.recall_score(y_true=y_true, y_pred=y_pred, average='weighted')
1111
+ weighted_f1 = skl.f1_score(y_true=y_true, y_pred=y_pred, average='weighted')
1112
+
1113
+ keys = [
1114
+ "ACCURACY", "MICRO-PRECISION", "MICRO-RECALL", "MICRO-F1",
1115
+ "MACRO-PRECISION", "MACRO-RECALL", "MACRO-F1",
1116
+ "WEIGHTED-PRECISION", "WEIGHTED-RECALL", "WEIGHTED-F1"
1117
+ ]
1118
+ values = [
1119
+ accuracy, micro_precision, micro_recall, micro_f1,
1120
+ macro_precision, macro_recall, macro_f1,
1121
+ weighted_precision, weighted_recall, weighted_f1
1122
+ ]
1123
+ return dict(zip(keys, values))
1124
+
1125
+ def fit(self,
1126
+ data=None,
1127
+ evaluation_metric=None,
1128
+ early_stop=None,
1129
+ frac=0.8,
1130
+ run_parallel=True,
1131
+ wait=True,
1132
+ verbose=0,
1133
+ stratify_column=None,
1134
+ sample_id_column=None,
1135
+ sample_seed=None,
1136
+ max_time=None,
1137
+ **kwargs):
1138
+ """
1139
+ DESCRIPTION:
1140
+ Function to run the teradataml analytic function for all sets of
1141
+ hyperparameters. Sets of hyperparameters chosen for execution
1142
+ from the parameter grid were the parameter grid is populated
1143
+ based on search algorithm.
1144
+ Notes:
1145
+ * In the Model trainer function, the best parameters are
1146
+ selected based on training results.
1147
+ * In the Non model trainer function, First execution parameter
1148
+ set is selected as the best parameters.
1149
+
1150
+ PARAMETERS:
1151
+ data:
1152
+ Optional Argument.
1153
+ Specifies the input teradataml DataFrame for model trainer function.
1154
+ Notes:
1155
+ * DataFrame need not to be passed in fit() methods, when "data" is
1156
+ passed as a model hyperparameters ("params").
1157
+ * "data" is a required argument for model trainer functions.
1158
+ * "data" is ignored for non-model trainer functions.
1159
+ * "data" can be contain single DataFrame or multiple DataFrame.
1160
+ * One can pass multiple dataframes to "data". Hyperparameter
1161
+ tuning is performed on all the dataframes for every model
1162
+ parameter.
1163
+ * "data" can be either a dictionary OR a tuple OR a dataframe.
1164
+ * If it is a dictionary then Key represents the label for
1165
+ dataframe and Value represents the dataframe.
1166
+ * If it is a tuple then teradataml converts it to dictionary
1167
+ by generating the labels internally.
1168
+ * If it is a dataframe then teradataml label it as "DF_0".
1169
+ Types: teradataml DataFrame, dictionary, tuples
1170
+
1171
+ evaluation_metric:
1172
+ Optional Argument.
1173
+ Specifies the evaluation metrics to considered for model
1174
+ evaluation.
1175
+ Notes:
1176
+ * evaluation_metric applicable for model trainer functions.
1177
+ * Best model is not selected when evaluation returns
1178
+ non-finite values.
1179
+ * MPD, MGD, RMSE, RMSLE are not supported for OpenSourceML models.
1180
+ Permitted Values:
1181
+ * Classification: Accuracy, Micro-Precision, Micro-Recall,
1182
+ Micro-F1, Macro-Precision, Macro-Recall,
1183
+ Macro-F1, Weighted-Precision,
1184
+ Weighted-Recall,
1185
+ Weighted-F1.
1186
+ * Regression: MAE, MSE, MSLE, MAPE, MPE, RMSE, RMSLE, ME,
1187
+ R2, EV, MPD, MGD
1188
+ * Clustering: SILHOUETTE
1189
+ Default Value:
1190
+ * Classification: Accuracy
1191
+ * Regression: MAE
1192
+ * Clustering: SILHOUETTE
1193
+ Types: str
1194
+
1195
+ early_stop:
1196
+ Optional Argument.
1197
+ Specifies the early stop mechanism value for model trainer
1198
+ functions. Hyperparameter tuning ends model training when
1199
+ the training model evaluation metric attains "early_stop" value.
1200
+ Note:
1201
+ * Early stopping supports only when evaluation returns
1202
+ finite value.
1203
+ Types: int or float
1204
+
1205
+ frac:
1206
+ Optional Argument.
1207
+ Specifies the split percentage of rows to be sampled for training
1208
+ and testing dataset. "frac" argument value must range between (0, 1).
1209
+ Notes:
1210
+ * This "frac" argument is not supported for non-model trainer
1211
+ function.
1212
+ * The "frac" value is considered as train split percentage and
1213
+ The remaining percentage is taken into account for test splitting.
1214
+ Default Value: 0.8
1215
+ Types: float
1216
+
1217
+ run_parallel:
1218
+ Optional Argument.
1219
+ Specifies the parallel execution functionality of hyperparameter
1220
+ tuning. When "run_parallel" set to true, model functions are
1221
+ executed concurrently. Otherwise, model functions are executed
1222
+ sequentially.
1223
+ Note:
1224
+ * Early stopping is not supported when parallel run is
1225
+ enabled.
1226
+ Default Value: True
1227
+ Types: bool
1228
+
1229
+ wait:
1230
+ Optional Argument.
1231
+ Specifies whether to wait for the completion of execution
1232
+ of hyperparameter tuning or not. When set to False, hyperparameter
1233
+ tuning is executed in the background and user can use "is_running()"
1234
+ method to check the status. Otherwise it waits until the execution
1235
+ is complete to return the control back to user.
1236
+ Default Value: True
1237
+ Type: bool
1238
+
1239
+ verbose:
1240
+ Optional Argument.
1241
+ Specifies whether to log the model training information and display
1242
+ the logs. When it is set to 1, progress bar alone logged in the
1243
+ console. When it is set to 2, along with progress bar, execution
1244
+ steps and execution time is logged in the console. When it is set
1245
+ to 0, nothing is logged in the console.
1246
+ Note:
1247
+ * verbose is not significant when "wait" is 'False'.
1248
+ Default Value: 0
1249
+ Type: bool
1250
+
1251
+ sample_seed:
1252
+ Optional Argument.
1253
+ Specifies the seed value that controls the shuffling applied
1254
+ to the data before applying the Train-Test split. Pass an int for
1255
+ reproducible output across multiple function calls.
1256
+ Notes:
1257
+ * When the argument is not specified, different
1258
+ runs of the query generate different outputs.
1259
+ * It must be in the range [0, 2147483647]
1260
+ * Seed is supported for stratify column.
1261
+ Types: int
1262
+
1263
+ stratify_column:
1264
+ Optional Argument.
1265
+ Specifies column name that contains the labels indicating
1266
+ which data needs to be stratified for TrainTest split.
1267
+ Notes:
1268
+ * seed is supported for stratify column.
1269
+ Types: str
1270
+
1271
+ sample_id_column:
1272
+ Optional Argument.
1273
+ Specifies the input data column name that has the
1274
+ unique identifier for each row in the input.
1275
+ Note:
1276
+ * Mandatory when "sample_seed" argument is present.
1277
+ Types: str
1278
+
1279
+ max_time:
1280
+ Optional Argument.
1281
+ Specifies the maximum time for the completion of Hyperparameter tuning execution.
1282
+ Default Value: None
1283
+ Types: int or float
1284
+
1285
+ kwargs:
1286
+ Optional Argument.
1287
+ Specifies the keyword arguments. Accepts additional arguments
1288
+ required for the teradataml analytic function.
1289
+
1290
+ RETURNS:
1291
+ None
1292
+
1293
+ RAISES:
1294
+ TeradataMlException, TypeError, ValueError
1295
+
1296
+ EXAMPLES:
1297
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
1298
+ >>> # by referring "__init__()" method.
1299
+ >>> # Perform fit() operation on the "optimizer_obj".
1300
+
1301
+ >>> eval_params = {"id_column": "id",
1302
+ "accumulate": "MedHouseVal"}
1303
+ >>> # Example 1: Passing single DataFrame for model trainer function.
1304
+ >>> optimizer_obj.fit(data=train_df,
1305
+ evaluation_metric="MAE",
1306
+ early_stop=70.9,
1307
+ **eval_params)
1308
+
1309
+ >>> # Example 2: Passing multiple datasets as tuple of DataFrames for
1310
+ >>> # model trainer function.
1311
+ >>> optimizer_obj.fit(data=(train_df_1, train_df_2),
1312
+ evaluation_metric="MAE",
1313
+ early_stop=70.9,
1314
+ **eval_params)
1315
+
1316
+ >>> # Example 3: Passing multiple datasets as dictionary of DataFrames
1317
+ >>> # for model trainer function.
1318
+ >>> optimizer_obj.fit(data={"Data-1":train_df_1, "Data-2":train_df_2},
1319
+ evaluation_metric="MAE",
1320
+ early_stop=70.9,
1321
+ **eval_params)
1322
+
1323
+ >>> # Example 4: No data argument passed in fit() method for model trainer function.
1324
+ >>> # Note: data argument must be passed while creating HPT object as
1325
+ >>> # model hyperparameters.
1326
+
1327
+ >>> # Define parameter space for model training with "data" argument.
1328
+ >>> params = {"data":(df1, df2),
1329
+ "input_columns":['MedInc', 'HouseAge', 'AveRooms',
1330
+ 'AveBedrms', 'Population', 'AveOccup',
1331
+ 'Latitude', 'Longitude'],
1332
+ "response_column":"MedHouseVal",
1333
+ "model_type":"regression",
1334
+ "batch_size":(11, 50, 75),
1335
+ "iter_max":(100, 301),
1336
+ "intercept":False,
1337
+ "learning_rate":"INVTIME",
1338
+ "nesterov":True,
1339
+ "local_sgd_iterations":1}
1340
+
1341
+ >>> # Create "optimizer_obj" using any search algorithm and perform
1342
+ >>> # fit() method without any "data" argument for model trainer function.
1343
+ >>> optimizer_obj.fit(evaluation_metric="MAE",
1344
+ early_stop=70.9,
1345
+ **eval_params)
1346
+
1347
+ >>> # Example 5: Do not pass data argument in fit() method for
1348
+ >>> # non-model trainer function.
1349
+ >>> # Note: data argument must be passed while creating HPT
1350
+ >>> # object as model hyperparameters.
1351
+ >>> optimizer_obj.fit()
1352
+
1353
+ >>> # Example 6: Passing "verbose" argument value '1' in fit() method to
1354
+ >>> # display model log.
1355
+ >>> optimizer_obj.fit(data=train_df, evaluation_metric="R2",
1356
+ verbose=1, **eval_params)
1357
+ completed: |████████████████████████████████████████████████████████████| 100% - 6/6
1358
+
1359
+ """
1360
+
1361
+ # Set the flag to notify fit method is called.
1362
+ self.__is_fit_called = True
1363
+
1364
+ # Validate "early_stop".
1365
+ arg_info_matrix = []
1366
+ arg_info_matrix.append(["early_stop", early_stop, True, (int, float)])
1367
+ arg_info_matrix.append(["frac", frac, True, (float)])
1368
+ arg_info_matrix.append(["run_parallel", run_parallel, True, (bool)])
1369
+ arg_info_matrix.append(["wait", wait, True, (bool)])
1370
+ arg_info_matrix.append(["evaluation_metric", evaluation_metric, True,
1371
+ (str), True, list(self.__osml_func_comparator)
1372
+ if self.__is_opensource_model
1373
+ else list(self.__func_comparator)])
1374
+ arg_info_matrix.append(["verbose", verbose, True, (int), True, [0,1,2]])
1375
+ arg_info_matrix.append(["max_time", max_time, True, (int, float)])
1376
+
1377
+ _Validators._validate_function_arguments(arg_info_matrix)
1378
+
1379
+ # set timeout value.
1380
+ self.__timeout = max_time
1381
+
1382
+ self._setting_model_trainer_data(data)
1383
+
1384
+ # Set the evaluation metrics.
1385
+ if evaluation_metric is not None:
1386
+ self.__evaluation_metric = evaluation_metric.upper()
1387
+ self.__early_stop = early_stop
1388
+ if self.__is_trainable and self.__is_evaluatable and self.__is_sqle_function:
1389
+
1390
+ # When "evaluation_metric" is 'MPE' then use the spl comparators.
1391
+ if self.__evaluation_metric == "MPE":
1392
+ self._is_best_metrics = self._is_early_stoppable = self._spl_abs_comparator
1393
+
1394
+ if not isinstance(self.__model_trainer_input_data, dict):
1395
+ # Sample all the labeled data for model training and testing.
1396
+ self.__perform_train_test_sampling(self._labeled_data, frac, stratify_column,
1397
+ sample_id_column, sample_seed)
1398
+
1399
+ elif isinstance(self.__model_trainer_input_data, dict):
1400
+ # Sample all the custom labeled data for model training and testing.
1401
+ self.__perform_train_test_sampling(self.__model_trainer_input_data, frac,
1402
+ stratify_column, sample_id_column,
1403
+ sample_seed)
1404
+ # Update model trainer function parameter grid.
1405
+ self.__update_model_parameters()
1406
+
1407
+ self.__eval_params = kwargs if self.__is_evaluatable else None
1408
+
1409
+ elif self.__is_trainable and self.__is_opensource_model:
1410
+
1411
+ if self.__is_clustering_model:
1412
+ self.__sampled_df_mapper = self._add_data_label("data")
1413
+ # Update model trainer function parameter grid.
1414
+ self.__update_model_parameters()
1415
+ elif self.__is_regression_model or self.__is_classification_model:
1416
+ # Open-source regression model: perform train-test split
1417
+
1418
+ if not isinstance(self.__model_trainer_input_data, dict):
1419
+ self.__perform_train_test_sampling(self._labeled_data, frac, stratify_column,
1420
+ sample_id_column, sample_seed)
1421
+ elif isinstance(self.__model_trainer_input_data, dict):
1422
+ self.__perform_train_test_sampling(self.__model_trainer_input_data, frac,
1423
+ stratify_column, sample_id_column,
1424
+ sample_seed)
1425
+ # Set evaluation parameters for supervised models
1426
+ self.__eval_params = kwargs if self.__is_evaluatable else None
1427
+
1428
+ self.__update_model_parameters()
1429
+
1430
+ elif self.__is_trainable and not self.__is_evaluatable:
1431
+ # This condition identifies unsupervised model trainer function.
1432
+ # Let's process training data.
1433
+ # Note: All unsupervised model training data argument named as 'data'.
1434
+ # Label the data with model training argument name.
1435
+ self.__sampled_df_mapper = self._add_data_label("data")
1436
+ # Update model trainer function parameter grid.
1437
+ self.__update_model_parameters()
1438
+ # Initialize logging.
1439
+ if verbose > 0:
1440
+ self.__progress_bar = _ProgressBar(jobs=len(self._parameter_grid), verbose=verbose)
1441
+
1442
+ # With VT option Parallel execution won't be possible, as it opens multiple connections.
1443
+ if not run_parallel or configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
1444
+ # Setting start time of Sequential execution.
1445
+
1446
+ self.__start_time = time.time() if self.__timeout is not None else None
1447
+ # TODO: Factorize the code once parallel execution part is completed in ELE-6154 JIRA.
1448
+ # Execute all parameters from populated parameter grid for both trainable
1449
+ # and non trainable function.
1450
+ for iter, param in enumerate(self._parameter_grid):
1451
+ self._execute_fit(model_param=param, iter=iter, **kwargs)
1452
+
1453
+ # Condition to check early stop feature applicable for model
1454
+ # trainer function.
1455
+ if self.__early_stop is not None and (self.__is_evaluatable or self.__is_clustering_model):
1456
+ if self.__is_finite and self._is_early_stoppable():
1457
+ # Terminate HPT execution when the trained model attains the
1458
+ # given "early_stop" value.
1459
+ break
1460
+ elif not self.__is_finite:
1461
+ # Raise error because non-finite values cannot be compared
1462
+ # with "__early_stop" value effectively.
1463
+ # Reset the best models and other properties before raising error.
1464
+ self.__default_model = self.__best_model = self.__best_score_ = \
1465
+ self.__best_model_id = self.__best_params_ = None
1466
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
1467
+ "execute 'fit()'","Early stop feature is not applicable"\
1468
+ " when '{metric}' metric results inconsistent value.".format(
1469
+ metric=self.__evaluation_metric))
1470
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1471
+ if self.__timeout is not None and self._is_time_stoppable():
1472
+ # Terminate HPT execution when the execution time exceeds the
1473
+ # given time limit.
1474
+ break
1475
+
1476
+ else:
1477
+ # TODO: Added support for early_stop feature along with concurrency in ELE-6154 JIRA.
1478
+ # Functions are executed concurrent.
1479
+ # Prepare the parameter grid for concurrent execution.
1480
+ async_exec_params = []
1481
+ for iter, param in enumerate(self._parameter_grid):
1482
+ _temp_params = {}
1483
+ _temp_params["iter"] = iter
1484
+ _temp_params["model_param"] = param
1485
+ _temp_params.update(kwargs)
1486
+ async_exec_params.append(_temp_params)
1487
+
1488
+ # Initialize the stopping event
1489
+ self.__parallel_stop_event = threading.Event()
1490
+ # let's initialize "_AsyncDBExecutor".
1491
+ self._async_executor = _AsyncDBExecutor(wait=wait)
1492
+ # Setting start time of Parallel execution.
1493
+ self.__start_time = time.time() if self.__timeout is not None else None
1494
+ # Trigger parallel thread execution.
1495
+ self._async_executor.submit(self._execute_fit, *async_exec_params)
1496
+
1497
+ if len(self.__model_err_records) > 0 and not kwargs.get('suppress_refer_msg', False):
1498
+ print('\nAn error occurred during Model Training.'\
1499
+ ' Refer to get_error_log() for more details.')
1500
+
1501
+
1502
+ def __model_trainer_routine(self, model_param, iter, **kwargs):
1503
+ """
1504
+ DESCRIPTION:
1505
+ Internal function to perform fit, predict and evaluate operations
1506
+ for model trainer functions. This model trainer routine supports
1507
+ for teradata analytic functions supported by merge model
1508
+ feature.
1509
+
1510
+ PARAMETERS:
1511
+ model_param:
1512
+ Required Argument.
1513
+ Specifies the model trainer arguments used for model training.
1514
+ Notes:
1515
+ * "model_param" contains both model training parameters
1516
+ and sampled data id.
1517
+ * Using 'param' key model training parameters are retrieved
1518
+ from "model_param".
1519
+ * Using 'data_id' key sampled data identifier is retrieved from
1520
+ "model_param".
1521
+ Types: dict
1522
+
1523
+ iter:
1524
+ Required Argument.
1525
+ Specifies the iteration count of HPT execution for teradataml
1526
+ analytic function.
1527
+ Types: int
1528
+
1529
+ kwargs:
1530
+ Required Argument.
1531
+ Specifies the keyword arguments used for model evaluation.
1532
+ Accepts additional required arguments for the model trainer
1533
+ function evaluation.
1534
+
1535
+ RETURNS:
1536
+ None
1537
+
1538
+ RAISES:
1539
+ None
1540
+
1541
+ EXAMPLES:
1542
+ >>> self.__model_trainer_routine(param=param, iter=iter, **kwargs)
1543
+ """
1544
+ # Define model name used for model metadata.
1545
+
1546
+ model_name = self._generate_model_name(iter)
1547
+ # Get the unique data identifier present in "model_param".
1548
+ _data_id = model_param[self.__DATA_ID]
1549
+ # 'param' variable holds model training parameters and train dataframe.
1550
+ # Get the model training parameters.
1551
+
1552
+ if self.__is_opensource_model:
1553
+ param_outer = model_param.get("param", {})
1554
+ param = param_outer.get("param", param_outer)
1555
+ data_input = param.pop("data", None)
1556
+ param = {k: v for k, v in param.items() if k != "data"}
1557
+ else:
1558
+ param = model_param["param"]
1559
+ data_input = None
1560
+
1561
+ # Check the stop_event set or not
1562
+ if self.__parallel_stop_event is not None and self.__parallel_stop_event.is_set():
1563
+ # Update the model metadata for Skip execution.
1564
+ self.__update_model_metadata(model_name, param, "SKIP", 0, 0, 0, _data_id)
1565
+ return
1566
+
1567
+ # Retrieve the train and test data using data identifier.
1568
+ if self.__is_opensource_model:
1569
+
1570
+ if self.__is_clustering_model:
1571
+ _train_data = self.__sampled_df_mapper[_data_id]
1572
+ _test_data = {} # No label needed
1573
+ elif self.__is_regression_model or self.__is_classification_model:
1574
+ _train_data, _test_data = self.__sampled_df_mapper[_data_id]
1575
+ kwargs.update(_test_data)
1576
+ else:
1577
+ _train_data, _test_data = self.__sampled_df_mapper[_data_id]
1578
+ # Update model training argument with train DataFrame.
1579
+ param.update(_train_data)
1580
+ # Update the test DataFrame for model evaluation.
1581
+ kwargs.update(_test_data)
1582
+
1583
+ try:
1584
+ # Record starting time of model training.
1585
+ start_time = time.perf_counter()
1586
+ if self.__is_val_function:
1587
+ # VAL uses special framework. So, Lets create new instance
1588
+ # using getattr method.
1589
+ self.__func = valib.__getattr__(self.__func_name)
1590
+ # Train the model.
1591
+ if self.__is_opensource_model:
1592
+ from teradataml import td_sklearn as skl
1593
+ func_class = getattr(skl, self.__func_name) # e.g., skl.KMeans
1594
+ if self.__is_regression_model or self.__is_classification_model:
1595
+ # Extract and remove only for regression models
1596
+ self.__input_columns = param.pop("input_columns", None)
1597
+ self.__response_column = param.pop("response_column", None)
1598
+
1599
+ func_obj = func_class(**param) # Safely create model instance
1600
+ else:
1601
+ func_obj = self.__func(**param)
1602
+ end_time = time.perf_counter()
1603
+ training_time = round((end_time - start_time), 3)
1604
+ # Store the trained object.
1605
+ self.__trained_models[model_name] = func_obj
1606
+
1607
+ if self.__is_opensource_model and self.__is_clustering_model:
1608
+ start_time_cluster = time.perf_counter()
1609
+ from teradataml import td_sklearn as skl
1610
+ feature_cols = [col for col in _train_data["data"].columns]
1611
+ func_obj.fit(data=_train_data["data"], feature_columns=feature_cols)
1612
+ pred_col = self._get_predict_column()
1613
+ result = func_obj.predict(data=_train_data["data"], feature_columns=feature_cols)
1614
+ result.materialize()
1615
+
1616
+ silhouette = skl.silhouette_score(
1617
+ X=result.select(feature_cols),
1618
+ labels=result.select([pred_col])
1619
+ )
1620
+
1621
+ calinski = skl.calinski_harabasz_score(
1622
+ X=result.select(feature_cols),
1623
+ labels=result.select([pred_col])
1624
+ )
1625
+
1626
+ davies = skl.davies_bouldin_score(
1627
+ X=result.select(feature_cols),
1628
+ labels=result.select([pred_col])
1629
+ )
1630
+
1631
+ columns = ["SILHOUETTE", "CALINSKI", "DAVIES"]
1632
+ eval_values = [silhouette, calinski, davies]
1633
+ eval_key_values = dict(zip(columns, eval_values))
1634
+
1635
+ end_time_cluster = time.perf_counter()
1636
+ training_time_cluster = round((end_time_cluster - start_time_cluster), 3)
1637
+
1638
+ if self.__evaluation_metric is None:
1639
+ self.__evaluation_metric = "SILHOUETTE"
1640
+
1641
+ self.__update_model_metadata(model_name, param, "PASS", training_time_cluster,
1642
+ end_time_cluster, start_time_cluster, _data_id, eval_key_values)
1643
+ elif self.__is_opensource_model and (self.__is_regression_model or self.__is_classification_model):
1644
+ start_time_lin = time.perf_counter()
1645
+ train_df = _train_data["data"]
1646
+ y = train_df.select([self.__response_column])
1647
+ X = train_df.drop(columns=[self.__response_column], axis=1)
1648
+
1649
+ func_obj.fit(X,y)
1650
+ pred_col = self._get_predict_column()
1651
+
1652
+ output = func_obj.predict(X,y)
1653
+
1654
+ y_true = output.select([self.__response_column])
1655
+ y_pred = output.select([pred_col])
1656
+
1657
+ if self.__is_regression_model:
1658
+ eval_key_values = self._regression_metrics(y_true, y_pred)
1659
+ if self.__evaluation_metric is None:
1660
+ self.__evaluation_metric = "MAE"
1661
+ elif self.__is_classification_model:
1662
+ eval_key_values = self._classification_metrics(y_true, y_pred)
1663
+ if self.__evaluation_metric is None:
1664
+ self.__evaluation_metric = "ACCURACY"
1665
+
1666
+ end_time_lin = time.perf_counter()
1667
+ training_time_lin = round((end_time_lin - start_time_lin), 3)
1668
+
1669
+ self.__update_model_metadata(model_name, param, "PASS", training_time_lin,
1670
+ end_time_lin, start_time_lin, _data_id, eval_key_values)
1671
+ else:
1672
+ # Evaluate the trained model.
1673
+ evaluations = func_obj.evaluate(**kwargs)
1674
+ # Extract evaluations report in dictionary format.
1675
+ if "RegressionEvaluator" in type(evaluations).__name__:
1676
+ # RegressionEvaluator results are stored under "result" attribute.
1677
+ # "result" dataframe column names are metrics and corresponding
1678
+ # rows are evaluation values.
1679
+ columns = evaluations.result.keys()
1680
+ eval_values = evaluations.result.get_values()[0]
1681
+
1682
+ # Default evaluation metric is set to "MAE" for Regression models.
1683
+ if self.__evaluation_metric is None:
1684
+ self.__evaluation_metric = "MAE"
1685
+
1686
+ else:
1687
+ # ClassificationEvaluator results are stored under "output_data"
1688
+ # attribute. "output_data" dataframe 'column 1' contains metrics
1689
+ # and 'column 2' holds corresponding evaluation values.
1690
+ eval_report = evaluations.output_data.get_values().transpose()
1691
+ columns = eval_report[1].astype('str')
1692
+ columns = [column_name.upper() for column_name in columns]
1693
+ eval_values = eval_report[2]
1694
+
1695
+ # Default evaluation metric is set to "ACCURACY" for
1696
+ # classification models.
1697
+ if self.__evaluation_metric is None:
1698
+ self.__evaluation_metric = "ACCURACY"
1699
+
1700
+ # Combine columns and eval_values into a dictionary
1701
+ eval_key_values = dict(zip(columns, eval_values))
1702
+ # Update the model metadata for successful model training.
1703
+ self.__update_model_metadata(model_name, param, "PASS",
1704
+ training_time, end_time, start_time,
1705
+ _data_id, eval_key_values)
1706
+
1707
+
1708
+ # Check whether self.__parallel_stop_event is None or not
1709
+ if self.__parallel_stop_event is not None:
1710
+ # SET the self.__parallel_stop_event
1711
+ # When trained model evaluation metric value exceeds self.__early_stop
1712
+ # or When execution time exceeds self.__timeout
1713
+ if (self.__early_stop is not None and self._is_early_stoppable())\
1714
+ or (self.__timeout is not None and self._is_time_stoppable()):
1715
+ self.__parallel_stop_event.set()
1716
+
1717
+ except Exception as _err_msg:
1718
+ # Record error message with corresponding "model_name".
1719
+ self.__model_err_records[model_name] = str(_err_msg)
1720
+ # Compute the failed execution time for failed training.
1721
+ end_time = time.perf_counter()
1722
+ training_time = round((end_time - start_time), 3)
1723
+ # Update the model metadata for failed execution.
1724
+ self.__update_model_metadata(model_name, param, "FAIL", training_time,
1725
+ end_time, start_time, _data_id)
1726
+ pass
1727
+
1728
+ def __non_model_trainer_routine(self, model_param, iter, **kwargs):
1729
+ """
1730
+ DESCRIPTION:
1731
+ Internal function to perform fit operations for non-model
1732
+ trainer functions. This is non-model trainer routine supports
1733
+ for teradata analytic functions.
1734
+ Note:
1735
+ * non-evaluatable model trainer function trained in this routine.
1736
+
1737
+ PARAMETERS:
1738
+ model_param:
1739
+ Required Argument.
1740
+ Specifies the model trainer arguments used for model execution.
1741
+ Notes:
1742
+ * "model_param" contains both model training parameters
1743
+ and data id for non-evaluatable model trainer
1744
+ functions.
1745
+ * Using 'param' key model training parameters are retrieved
1746
+ from "model_param" for non-evaluatable functions.
1747
+ * Using 'data_id' key data identifier is retrieved from
1748
+ "model_param" for non-evaluatable functions.
1749
+ * No pre-processing required in "model_param" for non-model
1750
+ trainer functions.
1751
+ * Instead of data identifier DataFrame is present for
1752
+ non-model trainer functions.
1753
+ Types: dict
1754
+
1755
+ iter:
1756
+ Required Argument.
1757
+ Specifies the iteration count of HPT execution for teradataml
1758
+ analytic function.
1759
+ Types: int
1760
+
1761
+ kwargs:
1762
+ Optional Argument.
1763
+ Specifies the keyword arguments. Accepts additional arguments
1764
+ required for the teradataml analytic function.
1765
+
1766
+ RETURNS:
1767
+ None
1768
+
1769
+ RAISES:
1770
+ None
1771
+
1772
+ EXAMPLES:
1773
+ >>> self.__non_model_trainer_routine(param=param, iter=iter, **kwargs)
1774
+ """
1775
+ # Define model name used for model metadata.
1776
+ model_name = self._generate_model_name(iter)
1777
+
1778
+ # 'param' variable holds model training parameters and train dataframe.
1779
+ param = None
1780
+ _data_id = None
1781
+ # Update model training argument with train dataframe for unsupervised models.
1782
+ if self.__is_trainable and not self.__is_evaluatable:
1783
+ # Get the model training data id.
1784
+ _data_id = model_param[self.__DATA_ID]
1785
+ # Retrieve train data using data id.
1786
+ _train_data = self.__sampled_df_mapper[_data_id]
1787
+ # Get the model training params.
1788
+ param = model_param["param"]
1789
+ # Update the params with training data.
1790
+ param.update(_train_data)
1791
+ else:
1792
+ # Initialize param for non-model trainer functions.
1793
+ param = model_param
1794
+ # Check the stop_event set or not
1795
+ if self.__parallel_stop_event is not None and self.__parallel_stop_event.is_set():
1796
+ # Update the model metadata for Skip execution.
1797
+ self.__update_model_metadata(model_name, param, "SKIP", 0, 0, 0, _data_id)
1798
+ return
1799
+ try:
1800
+ # Record starting time of model training.
1801
+ start_time = time.perf_counter()
1802
+ if self.__is_val_function:
1803
+ # VAL uses special framework. So, Lets create new instance
1804
+ # using getattr method.
1805
+ self.__func = valib.__getattr__(self.__func_name)
1806
+
1807
+ # Train the model.
1808
+ func_obj = self.__func(**param)
1809
+
1810
+ # Store the trained object.
1811
+ self.__trained_models[model_name] = func_obj
1812
+
1813
+ # Process training time.
1814
+ end_time = time.perf_counter()
1815
+ training_time = round((end_time - start_time), 3)
1816
+ # Update the model metadata for successful model training.
1817
+
1818
+ self.__update_model_metadata(model_name, param, "PASS", training_time, end_time, start_time, _data_id)
1819
+ except Exception as _err_msg:
1820
+ # Record error message with corresponding "model_name".
1821
+ self.__model_err_records[model_name] = str(_err_msg)
1822
+ # Compute the failed execution time for failed training.
1823
+ end_time = time.perf_counter()
1824
+ training_time = round((end_time - start_time), 3)
1825
+ # Update the model metadata for failed execution.
1826
+ self.__update_model_metadata(model_name, param, "FAIL", training_time, end_time, start_time, _data_id)
1827
+ pass
1828
+
1829
+ if self.__parallel_stop_event is not None:
1830
+ # SET the self.__parallel_stop_event
1831
+ # When execution time exceeds self.__timeout
1832
+ if self.__timeout is not None and self._is_time_stoppable():
1833
+ self.__parallel_stop_event.set()
1834
+
1835
+
1836
+ def __update_model_metadata(self, model_name,
1837
+ param,
1838
+ status,
1839
+ training_time,
1840
+ end_time,
1841
+ start_time,
1842
+ data_id=None,
1843
+ eval_key_values=None):
1844
+ """
1845
+ DESCRIPTION:
1846
+ Internal function to update the model evaluation details, that are
1847
+ used for "models" and "model_stats" properties.
1848
+
1849
+ PARAMETERS:
1850
+ model_name:
1851
+ Required Argument.
1852
+ Specifies the unique model name for the training model.
1853
+ Types: str
1854
+
1855
+ param:
1856
+ Required Argument.
1857
+ Specifies the model trainer function parameters used for
1858
+ model training.
1859
+ Types: dict
1860
+
1861
+ status:
1862
+ Required Argument.
1863
+ Specifies the status of executed teradataml analytic function.
1864
+ Permitted Values:
1865
+ * PASS: Function result present in the vantage.
1866
+ * FAIL: Function execution failed for the chosen parameters.
1867
+ * SKIP: Function execution skipped for the chosen parameters.
1868
+ Types: str
1869
+
1870
+ training_time:
1871
+ Required Argument.
1872
+ Specifies the model training time in seconds for both model trainer
1873
+ function and non-model trainer function.
1874
+ Types: float
1875
+
1876
+ end_time:
1877
+ Optional Argument.
1878
+ Specifies the end time of the model training.
1879
+ Types: float
1880
+
1881
+ start_time:
1882
+ Optional Argument.
1883
+ Specifies the start time of the model training.
1884
+ Types: float
1885
+
1886
+ data_id:
1887
+ Optional Argument.
1888
+ Specifies the unique data identifier used for model training.
1889
+ Note:
1890
+ * "data_id" is supported for model trainer functions.
1891
+ Types: str
1892
+
1893
+ eval_key_values:
1894
+ Optional Argument.
1895
+ Specifies the evaluation key values retrieved from model evaluation
1896
+ phase. This argument is a required argument for model trainer
1897
+ function.
1898
+ Types: dict.
1899
+
1900
+ RETURNS:
1901
+ None
1902
+
1903
+ RAISES:
1904
+ None
1905
+
1906
+ EXAMPLES:
1907
+ >>> optimizer_obj.__update_model_metadata(self,
1908
+ evaluations=evaluation_obj.result,
1909
+ iter=1, params={"columns" :
1910
+ ["age", "nbr_children", "income"],
1911
+ "response_column" : "years_with_bank"},
1912
+ status="Present")
1913
+
1914
+ """
1915
+ # Prepare model metadata.
1916
+ model_metadata = {"MODEL_ID" : model_name,
1917
+ "PARAMETERS" : param,
1918
+ "STATUS" : status}
1919
+ if self.__is_trainable:
1920
+ # Update "data_id" for model trainer functions.
1921
+ model_metadata[self.__DATA_ID.upper()] = data_id
1922
+
1923
+ # Format log message needs to displayed.
1924
+ _msg = "Model_id:{}, Run time:{}s, Start time:{}, End time:{}, Status:{}".format(model_name,
1925
+ training_time,
1926
+ start_time,
1927
+ end_time,
1928
+ status)
1929
+
1930
+ if status == "PASS" and (self.__is_evaluatable or self.__is_clustering_model):
1931
+ # While execution status is 'Fail' then update the evaluation result
1932
+ # with 'None' values.
1933
+ model_scores = eval_key_values
1934
+ model_metadata.update(model_scores)
1935
+ # Add additional model score to the log message.
1936
+ if self.__is_opensource_model and (self.__evaluation_metric is None or self.__evaluation_metric not in model_scores):
1937
+ if "SILHOUETTE" in model_scores:
1938
+ self.__evaluation_metric = "SILHOUETTE"
1939
+ _msg += ",{}:{}".format(self.__evaluation_metric,round(
1940
+ model_scores[self.__evaluation_metric], 3))
1941
+ # Best model updation.
1942
+ # 'self.__is_finite' holds 'True' until any infinite value is seen.
1943
+ self.__is_finite = self.__is_finite and np.isfinite(model_metadata[
1944
+ self.__evaluation_metric])
1945
+
1946
+ # Let's check if evaluation result is finite and model is the
1947
+ # new best model.
1948
+ if np.isfinite(model_metadata[self.__evaluation_metric]) and \
1949
+ (self.__best_score_ is None or \
1950
+ self._is_best_metrics(model_metadata[self.__evaluation_metric])):
1951
+ # Update existing best model.
1952
+ self.__default_model = self.__best_model = \
1953
+ self.__trained_models[model_name]
1954
+ # Update existing best score.
1955
+ self.__best_score_ = model_metadata[self.__evaluation_metric]
1956
+ # Update existing best model ID.
1957
+ self.__best_model_id = model_name
1958
+ # "self.__best_params_" contains best model parameters.
1959
+ self.__best_params_ = param
1960
+ # "__best_data_id" contains bet data identifier used for
1961
+ # training best model.
1962
+ self.__best_data_id = data_id
1963
+
1964
+ if not self.__progress_bar is None and status != 'SKIP':
1965
+ # Update progress bar when logging is required.
1966
+ self.__progress_bar.update(msg=_msg)
1967
+ # Update "__model_eval_records" with the formatted metadata.
1968
+ self.__model_eval_records.append(model_metadata)
1969
+
1970
+
1971
+ def predict(self, **kwargs):
1972
+ """
1973
+ DESCRIPTION:
1974
+ Function uses model training function generated models from SQLE,
1975
+ VAL and UAF features for predictions. Predictions are made using
1976
+ the best trained model. Predict function is not supported for
1977
+ non-model trainer function.
1978
+
1979
+ PARAMETERS:
1980
+ kwargs:
1981
+ Optional Argument.
1982
+ Specifies the keyword arguments. Accepts all merge model
1983
+ predict feature arguments required for the teradataml
1984
+ analytic function predictions.
1985
+
1986
+ RETURNS:
1987
+ Output teradataml DataFrames can be accessed using attribute
1988
+ references, such as HPTObj.<attribute_name>.
1989
+ Output teradataml DataFrame attribute name is:
1990
+ result
1991
+
1992
+ RAISES:
1993
+ TeradataMlException, TypeError, ValueError
1994
+
1995
+ EXAMPLES:
1996
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
1997
+ >>> # by referring "__init__()" method.
1998
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
1999
+ >>> # Perform prediction using "optimizer_obj".
2000
+ >>> optimizer_obj.predict(newdata=test_data, **eval_params)
2001
+ id prediction MedHouseVal
2002
+ 0 686 0.202843 1.578
2003
+ 1 2018 0.149868 0.578
2004
+ 2 1754 0.211870 1.651
2005
+ 3 670 0.192414 1.922
2006
+ 4 244 0.247545 1.117
2007
+ """
2008
+
2009
+ # Raise TeradataMLException error when non-model trainer function
2010
+ # identifier is passed.
2011
+ if not self.__is_trainable or not self.__is_predictable:
2012
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
2013
+ "execute 'predict()'","Not applicable for" \
2014
+ " non-model trainer analytic functions.")
2015
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2016
+
2017
+ if self.__default_model is None:
2018
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
2019
+ "execute 'predict()'",
2020
+ "No model is set as default to set a "\
2021
+ "prediction model use the 'set_model()' function.")
2022
+
2023
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2024
+
2025
+ test_data = kwargs.get("newdata", None)
2026
+
2027
+ if self.__is_opensource_model and self.__is_clustering_model:
2028
+ if test_data is None:
2029
+ test_data = self.__sampled_df_mapper[self.__best_data_id]["data"]
2030
+ feature_columns = kwargs.get("feature_columns", None)
2031
+
2032
+ # If feature columns not passed, fetch from training data
2033
+ if feature_columns is None:
2034
+ if self.__best_data_id is None:
2035
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
2036
+ "fetch 'feature_columns'",
2037
+ "No training metadata found")
2038
+
2039
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2040
+ training_df = self.__sampled_df_mapper[self.__best_data_id]["data"]
2041
+ training_columns = training_df.columns
2042
+
2043
+ feature_columns = [col for col in training_columns]
2044
+
2045
+ return self.__default_model.predict(data=test_data, feature_columns=feature_columns)
2046
+ elif self.__is_opensource_model and (self.__is_regression_model or self.__is_classification_model):
2047
+ if test_data is None:
2048
+ test_data = self.__sampled_df_mapper[self.__best_data_id][1]["data"]
2049
+ y_test = test_data.select([self.__response_column])
2050
+ X_test = test_data.drop(columns=[self.__response_column], axis=1)
2051
+
2052
+ return self.__default_model.predict(X_test, y_test)
2053
+ # TODO Enable this method, once Merge model supports VAL, and UAF.
2054
+ return self.__default_model.predict(**kwargs)
2055
+
2056
+
2057
+ def get_input_data(self, data_id):
2058
+ """
2059
+ DESCRIPTION:
2060
+ Function to get the input data used by model trainer functions.
2061
+ Unique identifiers (data_id) is used to get the training data.
2062
+ In case of unlabeled data such as single dataframe or tuple of
2063
+ dataframe, default unique identifiers are assigned. Hence, unlabeled
2064
+ training data is retrieved using default unique identifiers.
2065
+ Notes:
2066
+ * Function only returns input data for model trainer functions.
2067
+ * Train and Test sampled data are returned for supervised
2068
+ model trainer function (evaluatable functions).
2069
+ * Train data is returned for unsupervised-model trainer function
2070
+ (non-evaluatable functions).
2071
+
2072
+ PARAMETERS:
2073
+ data_id:
2074
+ Required Argument.
2075
+ Specifies the unique data identifier used for model training.
2076
+ Types: str
2077
+
2078
+ RETURNS:
2079
+ teradataml DataFrame
2080
+
2081
+ RAISES:
2082
+ ValueError
2083
+
2084
+ EXAMPLES:
2085
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
2086
+ >>> # by referring "__init__()" method.
2087
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
2088
+ >>> # Retrieve the training data.
2089
+ >>> optimizer_obj.get_input_data(data_id="DF_1")
2090
+ [{'data': id MedHouseVal MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude Longitude
2091
+ 0 19789 0.660 -1.154291 -0.668250 0.862203 7.021803 -1.389101 -1.106515 2.367716 -1.710719
2092
+ 1 17768 1.601 -0.447350 -0.162481 -0.431952 -0.156872 2.436223 2.172854 0.755780 -1.016640
2093
+ 2 19722 0.675 -0.076848 1.439120 1.805547 1.944759 -1.186169 0.326739 1.459894 -0.974996
2094
+ 3 18022 3.719 1.029892 0.343287 0.635952 -0.480133 -0.914869 -0.160824 0.711496 -1.067540
2095
+ 4 15749 3.500 -0.182247 1.776299 -0.364226 0.035715 -0.257239 -0.970166 0.941772 -1.294272
2096
+ 5 11246 2.028 -0.294581 -0.583955 -0.265916 -0.270654 0.182266 -0.703494 -0.807444 0.764827
2097
+ 6 16736 3.152 0.943735 1.439120 -0.747066 -1.036053 -1.071138 -0.678411 0.906345 -1.234118
2098
+ 7 12242 0.775 -1.076758 -0.752545 -0.424517 0.460470 0.742228 -0.597809 -0.838443 1.241428
2099
+ 8 14365 2.442 -0.704218 1.017646 -0.428965 -0.367301 -1.014707 -1.333045 -1.294568 1.121121
2100
+ 9 18760 1.283 0.019018 -1.258313 0.754993 0.013994 0.094365 0.222254 2.195008 -1.201728},
2101
+ {'newdata': id MedHouseVal MedInc HouseAge AveRooms AveBedrms Population AveOccup Latitude Longitude
2102
+ 0 16102 2.841 0.206284 1.270530 -0.248620 -0.224210 -0.059733 -0.242386 0.937344 -1.317408
2103
+ 1 15994 3.586 0.306050 1.439120 0.255448 -0.334613 -0.160657 -0.426510 0.937344 -1.303526
2104
+ 2 15391 2.541 0.423107 -1.595492 0.951807 -0.061005 1.955480 0.517572 -1.055434 1.236801
2105
+ 3 18799 0.520 -0.677565 -0.415366 0.548756 1.254406 -0.883398 -0.534060 2.358859 -1.035149
2106
+ 4 19172 1.964 0.247152 -0.162481 0.428766 -0.427459 -0.175849 -0.451380 1.238475 -1.396070
2107
+ 5 18164 3.674 0.295345 -1.258313 -1.078181 0.175885 0.045531 -1.298667 0.760208 -1.099930
2108
+ 6 13312 1.598 0.484475 -1.342608 0.767557 -0.229585 0.113899 0.361520 -0.692306 0.949915
2109
+ 7 12342 1.590 -0.520029 -0.246776 0.973345 1.407755 2.325532 -0.406887 -0.798587 1.445024}]
2110
+
2111
+ """
2112
+ # Validation.
2113
+ arg_info_matrix = []
2114
+ arg_info_matrix.append(["data_id", data_id, False, str,
2115
+ True, list(self.__sampled_df_mapper.keys())])
2116
+
2117
+ # "data_id" argument validation.
2118
+ # "data_id" validates for argument type, and permitted values.
2119
+ _Validators._validate_function_arguments(arg_info_matrix)
2120
+
2121
+ return self.__sampled_df_mapper.get(data_id)
2122
+
2123
+
2124
+ def get_model(self, model_id):
2125
+ """
2126
+ DESCRIPTION:
2127
+ Function to get the model.
2128
+
2129
+ PARAMETERS:
2130
+ model_id:
2131
+ Required Argument.
2132
+ Specifies the unique identifier for model.
2133
+ Notes:
2134
+ * Trained model results returned for model trainer functions.
2135
+ * Executed function results returned for non-model trainer
2136
+ functions.
2137
+ Types: str
2138
+
2139
+ RETURNS:
2140
+ Object of teradataml analytic functions.
2141
+ Note:
2142
+ * Attribute references remains same as that of the function
2143
+ attributes.
2144
+
2145
+ RAISES:
2146
+ TeradataMlException, ValueError
2147
+
2148
+ EXAMPLES:
2149
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
2150
+ >>> # by referring "__init__()" method.
2151
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
2152
+ >>> # Retrieve the trained model.
2153
+ >>> optimizer_obj.get_model(model_id="SVM_1")
2154
+ ############ output_data Output ############
2155
+
2156
+ iterNum loss eta bias
2157
+ 0 3 2.265289 0.028868 0.0
2158
+ 1 5 2.254413 0.022361 0.0
2159
+ 2 6 2.249260 0.020412 0.0
2160
+ 3 7 2.244463 0.018898 0.0
2161
+ 4 9 2.235800 0.016667 0.0
2162
+ 5 10 2.231866 0.015811 0.0
2163
+ 6 8 2.239989 0.017678 0.0
2164
+ 7 4 2.259956 0.025000 0.0
2165
+ 8 2 2.271862 0.035355 0.0
2166
+ 9 1 2.280970 0.050000 0.0
2167
+
2168
+ ############ result Output ############
2169
+
2170
+ predictor estimate value
2171
+ attribute
2172
+ -7 Alpha 0.50000 Elasticnet
2173
+ -3 Number of Observations 31.00000 None
2174
+ 5 Population -0.32384 None
2175
+ 0 (Intercept) 0.00000 None
2176
+ -17 OneClass SVM NaN FALSE
2177
+ -16 Kernel NaN LINEAR
2178
+ -1 Loss Function NaN EPSILON_INSENSITIVE
2179
+ 7 Latitude 0.00000 None
2180
+ -9 Learning Rate (Initial) 0.05000 None
2181
+ -14 Epsilon 0.10000 None
2182
+
2183
+ """
2184
+ # Validations
2185
+ arg_info_matrix = []
2186
+ arg_info_matrix.append(["model_id", model_id, False, str,
2187
+ True, list(self.__trained_models.keys())])
2188
+
2189
+ # "model_id" argument validations.
2190
+ # "model_id" validates for argument type, and permitted values.
2191
+ _Validators._validate_function_arguments(arg_info_matrix)
2192
+
2193
+ # Get the trained model object of trained model.
2194
+ model_obj = self.__trained_models.get(model_id)
2195
+ # Raise teradataml exception when HPT "fit" method is not executed.
2196
+ # since "self.__trained_models" does not contain a record for retrieval.
2197
+ if model_obj is None:
2198
+ err = Messages.get_message(MessageCodes.MODEL_NOT_FOUND,
2199
+ model_id, ' or not created')
2200
+ raise TeradataMlException(err, MessageCodes.MODEL_NOT_FOUND)
2201
+
2202
+ return model_obj
2203
+
2204
+
2205
+ def get_error_log(self, model_id):
2206
+ """
2207
+ DESCRIPTION:
2208
+ Function to get the error logs of a failed model training in the fit method.
2209
+
2210
+ PARAMETERS:
2211
+ model_id:
2212
+ Required Argument.
2213
+ Specifies the unique identifier for model.
2214
+ Note:
2215
+ * Only failed model training error log is returned.
2216
+ Types: str
2217
+
2218
+ RETURNS:
2219
+ string
2220
+
2221
+ RAISES:
2222
+ TypeError, ValueError
2223
+
2224
+ EXAMPLES:
2225
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
2226
+ >>> # by referring "__init__()" method.
2227
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
2228
+ >>> # Retrieve the error log.
2229
+ >>> optimizer_obj.get_error_log("SVM_2")
2230
+ "[Teradata][teradataml](TDML_2082) Value of 'iter_max' must be greater
2231
+ than or equal to 1 and less than or equal to 10000000."
2232
+
2233
+ """
2234
+ # Validations
2235
+ arg_info_matrix = []
2236
+ arg_info_matrix.append(["model_id", model_id, False, str,
2237
+ True, list(self.__model_err_records.keys())])
2238
+
2239
+ # "model_id" argument validations.
2240
+ # "model_id" validates for argument type, and permitted values.
2241
+ _Validators._validate_function_arguments(arg_info_matrix)
2242
+
2243
+ # Retrieve the raw error message
2244
+ msg = self.__model_err_records.get(model_id)
2245
+
2246
+ # For opensource models, return trimmed message
2247
+ if self.__is_opensource_model:
2248
+ return msg.split("\n", 1)[0].strip()
2249
+
2250
+ # For generic models, return original message
2251
+ return msg
2252
+
2253
+
2254
+ def set_model(self, model_id):
2255
+ """
2256
+ DESCRIPTION:
2257
+ Function to set the model to use for Prediction.
2258
+
2259
+ PARAMETERS:
2260
+ model_id:
2261
+ Required Argument.
2262
+ Specifies the unique identifier for model.
2263
+ Note:
2264
+ * Not significant for non-model trainer functions.
2265
+ Types: str
2266
+
2267
+ RETURNS:
2268
+ None
2269
+
2270
+ RAISES:
2271
+ TeradataMlException, ValueError
2272
+
2273
+ EXAMPLES:
2274
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
2275
+ >>> # by referring "__init__()" method.
2276
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
2277
+ >>> # Set the default trained model.
2278
+ >>> optimizer_obj.set_model(model_id="SVM_1")
2279
+ """
2280
+ # Raise TeradataMLException error when non-model trainer function
2281
+ # identifier is passed.
2282
+ if not self.__is_trainable:
2283
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
2284
+ "execute 'set_model()'","Not applicable for" \
2285
+ " non-model trainer analytic functions.")
2286
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2287
+
2288
+ # Replace the default model with the trained model.
2289
+ self.__default_model = self.get_model(model_id)
2290
+
2291
+
2292
+ def evaluate(self, **kwargs):
2293
+ """
2294
+ DESCRIPTION:
2295
+ Function uses trained models from SQLE, VAL and UAF features for
2296
+ evaluations. evaluations are made using the default trained model.
2297
+ Notes:
2298
+ * Evaluation supported for evaluatable model-trainer functions.
2299
+ * Best model is set as default model by default.
2300
+ * Default model can be changed using "set_model()" method.
2301
+
2302
+ PARAMETERS:
2303
+ kwargs:
2304
+ Optional Argument.
2305
+ Specifies the keyword arguments. Accepts additional arguments
2306
+ required for the teradataml analytic function evaluations.
2307
+ While "kwargs" is empty then internal sampled test dataset
2308
+ and arguments used for evaluation. Otherwise,
2309
+ All arguments required with validation data need to be passed
2310
+ for evaluation.
2311
+
2312
+ RETURNS:
2313
+ Output teradataml DataFrames can be accessed using attribute
2314
+ references, such as HPTEvaluateObj.<attribute_name>.
2315
+ Output teradataml DataFrame attribute name is:
2316
+ result
2317
+
2318
+ RAISES:
2319
+ TeradataMlException
2320
+
2321
+ EXAMPLES:
2322
+ >>> # Create an instance of the search algorithm called "optimizer_obj"
2323
+ >>> # by referring "__init__()" method.
2324
+ >>> # Perform "fit()" method on the optimizer_obj to populate model records.
2325
+ >>> # Perform evaluation using best model.
2326
+ >>> optimizer_obj.evaluate(newdata=test_data, **eval_params)
2327
+ ############ result Output ############
2328
+ MAE MSE MSLE MAPE MPE RMSE RMSLE ME R2 EV MPD MGD
2329
+ 0 2.616772 8.814968 0.0 101.876866 101.876866 2.969001 0.0 5.342344 -4.14622 -0.14862 NaN NaN
2330
+
2331
+ """
2332
+
2333
+ # Raise TeradataMLException error when non-model trainer function
2334
+ # identifier is passed.
2335
+ if not self.__is_trainable or not self.__is_evaluatable:
2336
+ if not self.__is_clustering_model:
2337
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
2338
+ "execute 'evaluate()'","Not applicable for" \
2339
+ " non-model trainer analytic functions.")
2340
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2341
+ else:
2342
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
2343
+ "execute 'evaluate()'","Not applicable for" \
2344
+ " clustering model functions.")
2345
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2346
+
2347
+ if self.__default_model is None:
2348
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
2349
+ "execute 'evaluate()'",
2350
+ "No model is set as default to set a "\
2351
+ "trained model for evaluation use "\
2352
+ "the 'set_model()' function.")
2353
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2354
+ if self.__is_opensource_model and (self.__is_regression_model or self.__is_classification_model):
2355
+ test_data = kwargs.get("newdata", None)
2356
+
2357
+ if test_data is None:
2358
+ test_data = self.__sampled_df_mapper[self.__best_data_id][1]["data"]
2359
+
2360
+ y_test = test_data.select([self.__response_column])
2361
+ X_test = test_data.drop(columns=[self.__response_column], axis=1)
2362
+
2363
+ pred_col = self._get_predict_column()
2364
+
2365
+ output = self.__default_model.predict(X_test,y_test)
2366
+
2367
+ y_true = output.select([self.__response_column])
2368
+ y_pred = output.select([pred_col])
2369
+
2370
+ if self.__is_regression_model:
2371
+ eval_key_values = self._regression_metrics(y_true, y_pred)
2372
+ elif self.__is_classification_model:
2373
+ eval_key_values = self._classification_metrics(y_true, y_pred)
2374
+
2375
+ import pandas as pd
2376
+ result_df = pd.DataFrame([eval_key_values])
2377
+ return result_df
2378
+ else:
2379
+ _params = self.__eval_params if len(kwargs) == 0 else kwargs
2380
+ if self._TRAINABLE_FUNCS_DATA_MAPPER[self.__func_name] not in _params:
2381
+ _params.update(self.__sampled_df_mapper[self.__best_data_id][1])
2382
+ return self.__default_model.evaluate(**_params)
2383
+
2384
+
2385
+ def __populate_parameter_grid(self):
2386
+ """
2387
+ DESCRIPTION:
2388
+ Internal function to populate parameter grid with all combinations.
2389
+
2390
+ PARAMETERS:
2391
+ None
2392
+
2393
+ RETURNS:
2394
+ List of dictionary
2395
+
2396
+ RAISES:
2397
+ None
2398
+
2399
+ EXAMPLES:
2400
+ >>> self.__populate_parameter_grid()
2401
+
2402
+ """
2403
+ param_pairs = []
2404
+ # Iterate all the parameters to create argument name and value pairs.
2405
+ for arg, arg_value in self.__params.items():
2406
+ temp_params = []
2407
+ if isinstance(arg_value, tuple):
2408
+ # When dictionary value type is tuple then add argument name to
2409
+ # all the values in tuples.
2410
+ for value in arg_value:
2411
+ temp_params.append((arg, value))
2412
+ else:
2413
+ # Add argument name to the value.
2414
+ temp_params.append((arg, arg_value))
2415
+
2416
+ # Append name and value pairs to the "param_pairs".
2417
+ param_pairs.append(temp_params)
2418
+
2419
+ # Return list of dictionary containing all possible combinations.
2420
+ return [dict(param) for param in product(*param_pairs)]
2421
+
2422
+ def _data_mapping(self):
2423
+ """
2424
+ DESCRIPTION:
2425
+ Internal function to create a Cartesian product of data mapped with input columns
2426
+ and parameter grid.
2427
+
2428
+ PARAMETERS:
2429
+ None
2430
+
2431
+ RETURNS:
2432
+ None
2433
+ """
2434
+ # Get the input columns from the params.
2435
+ input_columns = self.__params.pop("input_columns")
2436
+ # Create a list of dictionaries with data_id and input_columns
2437
+ data_mapping_list = []
2438
+ # Iterate over the labeled data and create a list of dictionaries
2439
+ for data_ids, data in self._labeled_data.items():
2440
+ # Check if all input columns are present in the data
2441
+ for input_cols in input_columns:
2442
+ if all(col in data.columns for col in input_cols):
2443
+ data_mapping_list.append({'data_id': data_ids,
2444
+ 'input_columns': input_cols})
2445
+
2446
+ self._parameter_grid = self.__populate_parameter_grid()
2447
+
2448
+ cartesian_product = product(self._parameter_grid, data_mapping_list)
2449
+
2450
+ result_list = []
2451
+
2452
+ # Iterate over the Cartesian product and construct the desired dictionaries
2453
+ for params, data_mapping in cartesian_product:
2454
+ result_dict = {
2455
+ 'param': {**params, 'input_columns': data_mapping['input_columns']},
2456
+ self.__DATA_ID: data_mapping['data_id']
2457
+ }
2458
+ result_list.append(result_dict)
2459
+
2460
+ self._parameter_grid = result_list
2461
+
2462
+
2463
+ def _setting_model_trainer_data(self,
2464
+ data=None):
2465
+ """
2466
+ DESCRIPTION:
2467
+ Internal function to set the model trainer input data for model
2468
+ training.
2469
+
2470
+ PARAMETERS:
2471
+ data:
2472
+ Optional Argument.
2473
+ Specifies the input data used for model training.
2474
+ Note:
2475
+ * "data" argument is a required argument for model trainer
2476
+ function when data argument is not passed with hyperparameters.
2477
+ * When data argument is passed with hyperparameters then
2478
+ "data" argument is optional.
2479
+ Types: teradataml DataFrame
2480
+
2481
+ RETURNS:
2482
+ None
2483
+
2484
+ Example:
2485
+ >>> print(self.__model_trainer_input_data)
2486
+ ( id admitted gpa stats programming masters
2487
+ 0 19 0 0.051643 0.0 0.0 1.0
2488
+ 1 6 1 0.765258 0.5 0.0 1.0
2489
+ 2 15 1 1.000000 0.0 0.0 1.0
2490
+ 3 32 0 0.746479 0.0 0.5 1.0
2491
+ 4 12 1 0.835681 1.0 1.0 0.0
2492
+ 5 40 0 0.976526 1.0 0.5 1.0
2493
+ 6 7 1 0.215962 1.0 1.0 1.0
2494
+ 7 36 0 0.530516 0.0 1.0 0.0
2495
+ 8 28 1 0.967136 0.0 0.0 0.0
2496
+ 9 17 1 0.920188 0.0 0.0 0.0,
2497
+ id admitted gpa stats programming masters
2498
+ 0 4 1 0.765258 0.5 1.0 1.0
2499
+ 1 6 1 0.765258 0.5 0.0 1.0
2500
+ 2 7 1 0.215962 1.0 1.0 1.0
2501
+ 3 8 1 0.812207 0.5 0.0 0.0
2502
+ 4 10 1 0.863850 0.0 0.0 0.0
2503
+ 5 11 1 0.591549 0.0 0.0 0.0
2504
+ 6 9 1 0.915493 0.0 0.0 0.0
2505
+ 7 5 0 0.737089 1.0 1.0 0.0
2506
+ 8 3 1 0.859155 1.0 0.5 0.0
2507
+ 9 2 0 0.887324 0.5 0.5 1.0,
2508
+ id admitted gpa stats programming masters
2509
+ 0 23 1 0.807512 0.0 1.0 1.0
2510
+ 1 25 1 0.981221 0.0 0.0 0.0
2511
+ 2 26 1 0.798122 0.0 0.0 1.0
2512
+ 3 27 0 0.981221 0.0 0.0 1.0
2513
+ 4 29 0 1.000000 1.0 0.5 1.0
2514
+ 5 30 0 0.901408 0.0 1.0 1.0
2515
+ 6 28 1 0.967136 0.0 0.0 0.0
2516
+ 7 24 1 0.000000 0.0 1.0 0.0
2517
+ 8 22 0 0.746479 1.0 0.5 1.0
2518
+ 9 21 1 0.938967 1.0 0.5 0.0)
2519
+
2520
+ >>> print(self._labeled_data)
2521
+ {'DF_0': id admitted gpa stats programming masters
2522
+ 0 26 1 0.798122 0.0 0.0 1.0
2523
+ 1 40 0 0.976526 1.0 0.5 1.0
2524
+ 2 7 1 0.215962 1.0 1.0 1.0
2525
+ 3 19 0 0.051643 0.0 0.0 1.0
2526
+ 4 15 1 1.000000 0.0 0.0 1.0
2527
+ 5 32 0 0.746479 0.0 0.5 1.0
2528
+ 6 38 1 0.366197 0.0 0.5 1.0
2529
+ 7 12 1 0.835681 1.0 1.0 0.0
2530
+ 8 6 1 0.765258 0.5 0.0 1.0
2531
+ 9 36 0 0.530516 0.0 1.0 0.0,
2532
+ 'DF_1': id admitted gpa stats programming masters
2533
+ 0 4 1 0.765258 0.5 1.0 1.0
2534
+ 1 6 1 0.765258 0.5 0.0 1.0
2535
+ 2 7 1 0.215962 1.0 1.0 1.0
2536
+ 3 8 1 0.812207 0.5 0.0 0.0
2537
+ 4 10 1 0.863850 0.0 0.0 0.0
2538
+ 5 11 1 0.591549 0.0 0.0 0.0
2539
+ 6 9 1 0.915493 0.0 0.0 0.0
2540
+ 7 5 0 0.737089 1.0 1.0 0.0
2541
+ 8 3 1 0.859155 1.0 0.5 0.0
2542
+ 9 2 0 0.887324 0.5 0.5 1.0,
2543
+ 'DF_2': id admitted gpa stats programming masters
2544
+ 0 23 1 0.807512 0.0 1.0 1.0
2545
+ 1 25 1 0.981221 0.0 0.0 0.0
2546
+ 2 26 1 0.798122 0.0 0.0 1.0
2547
+ 3 27 0 0.981221 0.0 0.0 1.0
2548
+ 4 29 0 1.000000 1.0 0.5 1.0
2549
+ 5 30 0 0.901408 0.0 1.0 1.0
2550
+ 6 28 1 0.967136 0.0 0.0 0.0
2551
+ 7 24 1 0.000000 0.0 1.0 0.0
2552
+ 8 22 0 0.746479 1.0 0.5 1.0
2553
+ 9 21 1 0.938967 1.0 0.5 0.0}
2554
+ """
2555
+ if self.__is_trainable:
2556
+ # "data" argument is a required argument for model trainer function
2557
+ # when data argument is not passed with hyperparameters. On other side,
2558
+ # "data" argument will be optional argument when data argument
2559
+ # is passed with hyperparameters.
2560
+ _is_optional_arg = self.__model_trainer_input_data is not None
2561
+ # validate the model trainer function 'data' argument.
2562
+ self.__validate_model_trainer_input_data_argument(data, _is_optional_arg)
2563
+
2564
+ if not data is None:
2565
+ # '__model_trainer_input_data' is assigned with "data" argument,
2566
+ # when user passes data argument in fit() method.
2567
+ # Note: if user attempts to pass data argument in both "params"
2568
+ # argument as hyperparameters or "data" argument in fit()
2569
+ # method, then latest "data" argument value is considered
2570
+ # for model training.
2571
+ self.__model_trainer_input_data = data
2572
+
2573
+ if self.__is_trainable and self.__is_evaluatable and self.__is_sqle_function:
2574
+ self._labeled_data = self._add_data_label()
2575
+ elif self.__is_trainable and self.__is_evaluatable and not self.__is_clustering_model:
2576
+ self._labeled_data = self._add_data_label()
2577
+
2578
+
2579
+ class GridSearch(_BaseSearch):
2580
+ def __init__(self, func, params):
2581
+ """
2582
+ DESCRIPTION:
2583
+ GridSearch is an exhaustive search algorithm that covers all possible
2584
+ parameter values to identify optimal hyperparameters. It works for
2585
+ teradataml analytic functions from SQLE, BYOM, VAL and UAF features.
2586
+ teradataml GridSearch allows user to perform hyperparameter tuning for
2587
+ all model trainer and non-model trainer functions.
2588
+ When used for model trainer functions:
2589
+ * Based on evaluation metrics search determines best model.
2590
+ * All methods and properties can be used.
2591
+ When used for non-model trainer functions:
2592
+ * Only fit() method is supported.
2593
+ * User can choose the best output as they see fit to use this.
2594
+
2595
+ teradataml GridSearch also allows user to use input data as the
2596
+ hyperparameter. This option can be suitable when the user wants to
2597
+ identify the best models for a set of input data. When user passes
2598
+ set of data as hyperparameter for model trainer function, the search
2599
+ determines the best data along with the best model based on the
2600
+ evaluation metrics.
2601
+ Note:
2602
+ * configure.temp_object_type="VT" follows sequential execution.
2603
+
2604
+ PARAMETERS:
2605
+ func:
2606
+ Required Argument.
2607
+ Specifies a teradataml analytic function from SQLE, VAL, and UAF.
2608
+ Types:
2609
+ teradataml Analytic Functions
2610
+ * Advanced analytic functions
2611
+ * UAF
2612
+ * VAL
2613
+ Refer to display_analytic_functions() function for list of functions.
2614
+
2615
+ params:
2616
+ Required Argument.
2617
+ Specifies the parameter(s) of a teradataml analytic function.
2618
+ The parameter(s) must be in dictionary. keys refers to the
2619
+ argument names and values refers to argument values for corresponding
2620
+ arguments.
2621
+ Notes:
2622
+ * One can specify the argument value in a tuple to run HPT
2623
+ with different arguments.
2624
+ * Model trainer function arguments "id_column", "input_columns",
2625
+ and "target_columns" must be passed in fit() method.
2626
+ * All required arguments of non-model trainer function must
2627
+ be passed while GridSearch object creation.
2628
+ Types: dict
2629
+
2630
+ RETURNS:
2631
+ None
2632
+
2633
+ RAISES:
2634
+ TeradataMlException, TypeError, ValueError
2635
+
2636
+ EXAMPLES:
2637
+ >>> # Example 1: Model trainer function. Performing hyperparameter-tuning
2638
+ >>> # on SVM model trainer function.
2639
+
2640
+ >>> # Load the example data.
2641
+ >>> load_example_data("teradataml", ["cal_housing_ex_raw"])
2642
+
2643
+ >>> # Create teradataml DataFrame objects.
2644
+ >>> data_input = DataFrame.from_table("cal_housing_ex_raw")
2645
+
2646
+ >>> # Scale "target_columns" with respect to 'STD' value of the column.
2647
+ >>> fit_obj = ScaleFit(data=data_input,
2648
+ target_columns=['MedInc', 'HouseAge', 'AveRooms',
2649
+ 'AveBedrms', 'Population', 'AveOccup',
2650
+ 'Latitude', 'Longitude'],
2651
+ scale_method="STD")
2652
+
2653
+ >>> # Transform the data.
2654
+ >>> transform_obj = ScaleTransform(data=data_input,
2655
+ object=fit_obj.output,
2656
+ accumulate=["id", "MedHouseVal"])
2657
+
2658
+ >>> # Define parameter space for model training.
2659
+ >>> params = {"input_columns":['MedInc', 'HouseAge', 'AveRooms',
2660
+ 'AveBedrms', 'Population', 'AveOccup',
2661
+ 'Latitude', 'Longitude'],
2662
+ "response_column":"MedHouseVal",
2663
+ "model_type":"regression",
2664
+ "batch_size":(11, 50, 75),
2665
+ "iter_max":(100, 301),
2666
+ "lambda1":0.1,
2667
+ "alpha":0.5,
2668
+ "iter_num_no_change":60,
2669
+ "tolerance":0.01,
2670
+ "intercept":False,
2671
+ "learning_rate":"INVTIME",
2672
+ "initial_data":0.5,
2673
+ "decay_rate":0.5,
2674
+ "momentum":0.6,
2675
+ "nesterov":True,
2676
+ "local_sgd_iterations":1}
2677
+
2678
+ >>> # Required argument for model prediction and evaluation.
2679
+ >>> eval_params = {"id_column": "id",
2680
+ "accumulate": "MedHouseVal"}
2681
+
2682
+ >>> # Import trainer function and optimizer.
2683
+ >>> from teradataml import SVM, GridSearch
2684
+
2685
+ >>> # Initialize the GridSearch optimizer with model trainer
2686
+ >>> # function and parameter space required for model training.
2687
+ >>> gs_obj = GridSearch(func=SVM, params=params)
2688
+
2689
+ >>> # Perform model optimization for SVM function.
2690
+ >>> # Evaluation and prediction arguments are passed along with
2691
+ >>> # training dataframe.
2692
+ >>> gs_obj.fit(data=transform_obj.result, **eval_params)
2693
+
2694
+ >>> # View trained models.
2695
+ >>> gs_obj.models
2696
+ MODEL_ID DATA_ID PARAMETERS STATUS MAE
2697
+ 0 SVM_3 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
2698
+ 1 SVM_0 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.660815
2699
+ 2 SVM_1 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.660815
2700
+ 3 SVM_2 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
2701
+ 4 SVM_4 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
2702
+ 5 SVM_5 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
2703
+
2704
+ >>> # View model evaluation stats.
2705
+ >>> gs_obj.model_stats
2706
+ MODEL_ID DATA_ID PARAMETERS STATUS MAE
2707
+ 0 SVM_3 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772`
2708
+ 1 SVM_0 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.660815
2709
+ 2 SVM_1 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.660815
2710
+ 3 SVM_2 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
2711
+ 4 SVM_4 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772
2712
+ 5 SVM_5 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.616772`
2713
+
2714
+ >>> # View best data, model ID and score.
2715
+ >>> print("Best data ID: ", gs_obj.best_data_id)
2716
+ Best data ID: DF_0
2717
+ >>> print("Best model ID: ", gs_obj.best_model_id)
2718
+ Best model ID: SVM_3
2719
+ >>> print("Best model score: ",gs_obj.best_score_)
2720
+ Best model score: 2.616772068334627
2721
+
2722
+ >>> # Performing prediction on sampled data using best trained model.
2723
+ >>> test_data = transform_obj.result.iloc[:5]
2724
+ >>> gs_pred = gs_obj.predict(newdata=test_data, **eval_params)
2725
+ >>> print("Prediction result: \n", gs_pred.result)
2726
+ Prediction result:
2727
+ id prediction MedHouseVal
2728
+ 0 686 0.202843 1.578
2729
+ 1 2018 0.149868 0.578
2730
+ 2 1754 0.211870 1.651
2731
+ 3 670 0.192414 1.922
2732
+ 4 244 0.247545 1.117
2733
+
2734
+ >>> # Perform evaluation using best model.
2735
+ >>> gs_obj.evaluate()
2736
+ ############ result Output ############
2737
+ MAE MSE MSLE MAPE MPE RMSE RMSLE ME R2 EV MPD MGD
2738
+ 0 2.616772 8.814968 0.0 101.876866 101.876866 2.969001 0.0 5.342344 -4.14622 -0.14862 NaN NaN
2739
+
2740
+ >>> # Retrieve any trained model.
2741
+ >>> gs_obj.get_model("SVM_1")
2742
+ ############ output_data Output ############
2743
+
2744
+ iterNum loss eta bias
2745
+ 0 3 2.060386 0.028868 0.0
2746
+ 1 5 2.055509 0.022361 0.0
2747
+ 2 6 2.051982 0.020412 0.0
2748
+ 3 7 2.048387 0.018898 0.0
2749
+ 4 9 2.041521 0.016667 0.0
2750
+ 5 10 2.038314 0.015811 0.0
2751
+ 6 8 2.044882 0.017678 0.0
2752
+ 7 4 2.058757 0.025000 0.0
2753
+ 8 2 2.065932 0.035355 0.0
2754
+ 9 1 1.780877 0.050000 0.0
2755
+
2756
+
2757
+ ############ result Output ############
2758
+
2759
+ predictor estimate value
2760
+ attribute
2761
+ 7 Latitude 0.155095 None
2762
+ -9 Learning Rate (Initial) 0.050000 None
2763
+ -17 OneClass SVM NaN FALSE
2764
+ -14 Epsilon 0.100000 None
2765
+ 5 Population 0.000000 None
2766
+ -12 Nesterov NaN TRUE
2767
+ -5 BIC 73.297397 None
2768
+ -7 Alpha 0.500000 Elasticnet
2769
+ -3 Number of Observations 55.000000 None
2770
+ 0 (Intercept) 0.000000 None
2771
+
2772
+ >>> # Update the default model.
2773
+ >>> gs_obj.set_model("SVM_1")
2774
+
2775
+
2776
+
2777
+ >>> # Example 2: Model trainer function. Performing hyperparameter-tuning
2778
+ >>> # on SVM model trainer function using unlabeled multiple-dataframe.
2779
+
2780
+ >>> # Slicing transformed dataframe into two part to present
2781
+ >>> # multiple-dataframe support.
2782
+
2783
+ >>> train_df_1 = transform_obj.result.iloc[:30]
2784
+ >>> train_df_2 = transform_obj.result.iloc[30:]
2785
+
2786
+ >>> # Initialize the GridSearch optimizer with model trainer
2787
+ >>> # function and parameter space required for model training.
2788
+ >>> gs_obj = GridSearch(func=SVM, params=params)
2789
+
2790
+ >>> # Perform model optimization for SVM function for
2791
+ >>> # unlabeled multiple-dataframe support.
2792
+ >>> # Evaluation and prediction arguments are passed along with
2793
+ >>> # training dataframe.
2794
+ >>> gs_obj.fit(data=(train_df_1, train_df_2), **eval_params)
2795
+
2796
+ >>> # View trained models.
2797
+ >>> gs_obj.models
2798
+ MODEL_ID DATA_ID PARAMETERS STATUS MAE
2799
+ 0 SVM_3 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.650505
2800
+ 1 SVM_1 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.650505
2801
+ 2 SVM_2 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.326521
2802
+ 3 SVM_0 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.326521
2803
+ 4 SVM_7 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.650505
2804
+ 5 SVM_4 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.326521
2805
+ 6 SVM_6 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.326521
2806
+ 7 SVM_5 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.650505
2807
+ 8 SVM_9 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.650505
2808
+ 9 SVM_10 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.326521
2809
+ 10 SVM_11 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.650505
2810
+ 11 SVM_8 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.326521
2811
+ >>> # View model evaluation stats.
2812
+ >>> gs_obj.model_stats
2813
+ MODEL_ID MAE MSE MSLE MAPE ... ME R2 EV MPD MGD
2814
+ 0 SVM_3 2.650505 8.459088 0.0 159.159527 ... 5.282729 -2.930531 0.333730 NaN NaN
2815
+ 1 SVM_1 2.650505 8.459088 0.0 159.159527 ... 5.282729 -2.930531 0.333730 NaN NaN
2816
+ 2 SVM_2 2.326521 6.218464 0.0 90.629648 ... 3.776410 -6.987358 -0.034968 NaN NaN
2817
+ 3 SVM_0 2.326521 6.218464 0.0 90.629648 ... 3.776410 -6.987358 -0.034968 NaN NaN
2818
+ 4 SVM_7 2.650505 8.459088 0.0 159.159527 ... 5.282729 -2.930531 0.333730 NaN NaN
2819
+ 5 SVM_4 2.326521 6.218464 0.0 90.629648 ... 3.776410 -6.987358 -0.034968 NaN NaN
2820
+ 6 SVM_6 2.326521 6.218464 0.0 90.629648 ... 3.776410 -6.987358 -0.034968 NaN NaN
2821
+ 7 SVM_5 2.650505 8.459088 0.0 159.159527 ... 5.282729 -2.930531 0.333730 NaN NaN
2822
+ 8 SVM_9 2.650505 8.459088 0.0 159.159527 ... 5.282729 -2.930531 0.333730 NaN NaN
2823
+ 9 SVM_10 2.326521 6.218464 0.0 90.629648 ... 3.776410 -6.987358 -0.034968 NaN NaN
2824
+ 10 SVM_11 2.650505 8.459088 0.0 159.159527 ... 5.282729 -2.930531 0.333730 NaN NaN
2825
+ 11 SVM_8 2.326521 6.218464 0.0 90.629648 ... 3.776410 -6.987358 -0.034968 NaN NaN
2826
+
2827
+
2828
+ >>> # View best data, model ID and score.
2829
+ >>> print("Best data ID: ", gs_obj.best_data_id)
2830
+ Best data ID: DF_0
2831
+ >>> print("Best model ID: ", gs_obj.best_model_id)
2832
+ Best model ID: SVM_2
2833
+ >>> print("Best model score: ",gs_obj.best_score_)
2834
+ Best model score: 2.3265213466885375
2835
+
2836
+ >>> # Performing prediction on sampled data using best trained model.
2837
+ >>> test_data = transform_obj.result.iloc[:5]
2838
+ >>> gs_pred = gs_obj.predict(newdata=test_data, **eval_params)
2839
+ >>> print("Prediction result: \n", gs_pred.result)
2840
+ Prediction result:
2841
+ id prediction MedHouseVal
2842
+ 0 686 -0.214558 1.578
2843
+ 1 2018 0.224954 0.578
2844
+ 2 1754 -0.484374 1.651
2845
+ 3 670 -0.288802 1.922
2846
+ 4 244 -0.097476 1.117
2847
+
2848
+ >>> # Perform evaluation using best model.
2849
+ >>> gs_obj.evaluate()
2850
+ ############ result Output ############
2851
+
2852
+ MAE MSE MSLE MAPE MPE RMSE RMSLE ME R2 EV MPD MGD
2853
+ 0 2.326521 6.218464 0.0 90.629648 90.629648 2.493685 0.0 3.77641 -6.987358 -0.034968 NaN NaN
2854
+
2855
+
2856
+ >>> # Retrieve any trained model.
2857
+ >>> gs_obj.get_model("SVM_1")
2858
+ ############ output_data Output ############
2859
+
2860
+ iterNum loss eta bias
2861
+ 0 3 2.078232 0.028868 0.0
2862
+ 1 5 2.049456 0.022361 0.0
2863
+ 2 6 2.037157 0.020412 0.0
2864
+ 3 7 2.028186 0.018898 0.0
2865
+ 4 9 2.012801 0.016667 0.0
2866
+ 5 10 2.007469 0.015811 0.0
2867
+ 6 8 2.020026 0.017678 0.0
2868
+ 7 4 2.063343 0.025000 0.0
2869
+ 8 2 2.092763 0.035355 0.0
2870
+ 9 1 2.112669 0.050000 0.0
2871
+
2872
+
2873
+ ############ result Output ############
2874
+
2875
+ predictor estimate value
2876
+ attribute
2877
+ 7 Latitude 0.077697 None
2878
+ -9 Learning Rate (Initial) 0.050000 None
2879
+ -17 OneClass SVM NaN FALSE
2880
+ -14 Epsilon 0.100000 None
2881
+ 5 Population -0.120322 None
2882
+ -12 Nesterov NaN TRUE
2883
+ -5 BIC 50.583018 None
2884
+ -7 Alpha 0.500000 Elasticnet
2885
+ -3 Number of Observations 31.000000 None
2886
+ 0 (Intercept) 0.000000 None
2887
+
2888
+
2889
+ >>> # Update the default model.
2890
+ >>> gs_obj.set_model("SVM_1")
2891
+
2892
+ >>> # Example 3: Model trainer function. Performing hyperparameter-tuning
2893
+ >>> # on SVM model trainer function using labeled multiple-dataframe.
2894
+
2895
+ >>> # Initialize the GridSearch optimizer with model trainer
2896
+ >>> # function and parameter space required for model training.
2897
+ >>> gs_obj = GridSearch(func=SVM, params=params)
2898
+
2899
+ >>> # Perform model optimization for SVM function for
2900
+ >>> # labeled multiple-dataframe support.
2901
+ >>> # Evaluation and prediction arguments are passed along with
2902
+ >>> # training dataframe.
2903
+ >>> gs_obj.fit(data={"Data-1":train_df_1, "Data-2":train_df_2}, **eval_params)
2904
+
2905
+ >>> # View trained models.
2906
+ >>> gs_obj.models
2907
+ MODEL_ID DATA_ID PARAMETERS STATUS MAE
2908
+ 0 SVM_1 Data-2 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.286463
2909
+ 1 SVM_3 Data-2 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.286463
2910
+ 2 SVM_2 Data-1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.156109
2911
+ 3 SVM_0 Data-1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.156109
2912
+ 4 SVM_7 Data-2 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.286463
2913
+ 5 SVM_4 Data-1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.156109
2914
+ 6 SVM_5 Data-2 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.286463
2915
+ 7 SVM_6 Data-1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.156109
2916
+ 8 SVM_10 Data-1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.156109
2917
+ 9 SVM_8 Data-1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.156109
2918
+ 10 SVM_9 Data-2 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.286463
2919
+ 11 SVM_11 Data-2 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.286463
2920
+
2921
+ >>> # View model evaluation stats.
2922
+ >>> gs_obj.model_stats
2923
+ MODEL_ID MAE MSE MSLE MAPE ... ME R2 EV MPD MGD
2924
+ 0 SVM_1 2.286463 5.721906 0.115319 120.188468 ... 3.280316 -3.436736 0.616960 NaN NaN
2925
+ 1 SVM_3 2.286463 5.721906 0.115319 120.188468 ... 3.280316 -3.436736 0.616960 NaN NaN
2926
+ 2 SVM_2 2.156109 6.986356 0.000000 97.766138 ... 4.737632 -2.195437 -0.235152 NaN NaN
2927
+ 3 SVM_0 2.156109 6.986356 0.000000 97.766138 ... 4.737632 -2.195437 -0.235152 NaN NaN
2928
+ 4 SVM_7 2.286463 5.721906 0.115319 120.188468 ... 3.280316 -3.436736 0.616960 NaN NaN
2929
+ 5 SVM_4 2.156109 6.986356 0.000000 97.766138 ... 4.737632 -2.195437 -0.235152 NaN NaN
2930
+ 6 SVM_5 2.286463 5.721906 0.115319 120.188468 ... 3.280316 -3.436736 0.616960 NaN NaN
2931
+ 7 SVM_6 2.156109 6.986356 0.000000 97.766138 ... 4.737632 -2.195437 -0.235152 NaN NaN
2932
+ 8 SVM_10 2.156109 6.986356 0.000000 97.766138 ... 4.737632 -2.195437 -0.235152 NaN NaN
2933
+ 9 SVM_8 2.156109 6.986356 0.000000 97.766138 ... 4.737632 -2.195437 -0.235152 NaN NaN
2934
+ 10 SVM_9 2.286463 5.721906 0.115319 120.188468 ... 3.280316 -3.436736 0.616960 NaN NaN
2935
+ 11 SVM_11 2.286463 5.721906 0.115319 120.188468 ... 3.280316 -3.436736 0.616960 NaN NaN
2936
+
2937
+ [12 rows x 13 columns]
2938
+
2939
+ >>> # View best data, model ID and score.
2940
+ >>> print("Best data ID: ", gs_obj.best_data_id)
2941
+ Best data ID: Data-1
2942
+ >>> print("Best model ID: ", gs_obj.best_model_id)
2943
+ Best model ID: SVM_2
2944
+ >>> print("Best model score: ",gs_obj.best_score_)
2945
+ Best model score: 2.156108718480682
2946
+
2947
+ >>> # Performing prediction on sampled data using best trained model.
2948
+ >>> test_data = transform_obj.result.iloc[:5]
2949
+ >>> gs_pred = gs_obj.predict(newdata=test_data, **eval_params)
2950
+ >>> print("Prediction result: \n", gs_pred.result)
2951
+ Prediction result:
2952
+ id prediction MedHouseVal
2953
+ 0 686 -0.512750 1.578
2954
+ 1 2018 0.065364 0.578
2955
+ 2 1754 -0.849449 1.651
2956
+ 3 670 -0.657097 1.922
2957
+ 4 244 -0.285946 1.117
2958
+
2959
+ >>> # Perform evaluation using best model.
2960
+ >>> gs_obj.evaluate()
2961
+ ############ result Output ############
2962
+
2963
+ MAE MSE MSLE MAPE MPE RMSE RMSLE ME R2 EV MPD MGD
2964
+ 0 2.156109 6.986356 0.0 97.766138 83.453982 2.643172 0.0 4.737632 -2.195437 -0.235152 NaN NaN
2965
+
2966
+ >>> # Retrieve any trained model.
2967
+ >>> gs_obj.get_model("SVM_1")
2968
+ ############ output_data Output ############
2969
+
2970
+ iterNum loss eta bias
2971
+ 0 3 2.238049 0.028868 0.0
2972
+ 1 5 2.198618 0.022361 0.0
2973
+ 2 6 2.183347 0.020412 0.0
2974
+ 3 7 2.171550 0.018898 0.0
2975
+ 4 9 2.154619 0.016667 0.0
2976
+ 5 10 2.147124 0.015811 0.0
2977
+ 6 8 2.162718 0.017678 0.0
2978
+ 7 4 2.217790 0.025000 0.0
2979
+ 8 2 2.257826 0.035355 0.0
2980
+ 9 1 2.286324 0.050000 0.0
2981
+
2982
+
2983
+ ############ result Output ############
2984
+
2985
+ predictor estimate value
2986
+ attribute
2987
+ -7 Alpha 0.500000 Elasticnet
2988
+ -3 Number of Observations 31.000000 None
2989
+ 5 Population -0.094141 None
2990
+ 0 (Intercept) 0.000000 None
2991
+ -17 OneClass SVM NaN FALSE
2992
+ -16 Kernel NaN LINEAR
2993
+ -1 Loss Function NaN EPSILON_INSENSITIVE
2994
+ 7 Latitude 0.169825 None
2995
+ -9 Learning Rate (Initial) 0.050000 None
2996
+ -14 Epsilon 0.100000 None
2997
+
2998
+ >>> # Update the default model.
2999
+ >>> gs_obj.set_model("SVM_1")
3000
+
3001
+
3002
+ >>> # Example 4: Model trainer function. Performing hyperparameter-tuning
3003
+ >>> # on SVM model trainer function by passing unlabeled
3004
+ >>> # multiple-dataframe as model hyperparameter.
3005
+
3006
+ >>> # Define parameter space for model training.
3007
+ >>> params = {"data":(train_df_1, train_df_2),
3008
+ "input_columns":['MedInc', 'HouseAge', 'AveRooms',
3009
+ 'AveBedrms', 'Population', 'AveOccup',
3010
+ 'Latitude', 'Longitude'],
3011
+ "response_column":"MedHouseVal",
3012
+ "model_type":"regression",
3013
+ "batch_size":(11, 50, 75),
3014
+ "iter_max":(100, 301),
3015
+ "lambda1":0.1,
3016
+ "alpha":0.5,
3017
+ "iter_num_no_change":60,
3018
+ "tolerance":0.01,
3019
+ "intercept":False,
3020
+ "learning_rate":"INVTIME",
3021
+ "initial_data":0.5,
3022
+ "decay_rate":0.5,
3023
+ "momentum":0.6,
3024
+ "nesterov":True,
3025
+ "local_sgd_iterations":1}
3026
+
3027
+ >>> # Initialize the GridSearch optimizer with model trainer
3028
+ >>> # function and parameter space required for model training.
3029
+ >>> gs_obj = GridSearch(func=SVM, params=params)
3030
+
3031
+ >>> # Perform model optimization for SVM function for
3032
+ >>> # labeled multiple-dataframe support.
3033
+ >>> # Evaluation and prediction arguments are passed along with
3034
+ >>> # training dataframe.
3035
+ >>> gs_obj.fit(**eval_params)
3036
+
3037
+ >>> # View trained models.
3038
+ >>> gs_obj.models
3039
+ MODEL_ID DATA_ID PARAMETERS STATUS MAE
3040
+ 0 SVM_0 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.007936
3041
+ 1 SVM_1 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.517338
3042
+ 2 SVM_3 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.517338
3043
+ 3 SVM_2 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.007936
3044
+ 4 SVM_5 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.517338
3045
+ 5 SVM_7 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.517338
3046
+ 6 SVM_6 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.007936
3047
+ 7 SVM_4 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.007936
3048
+ 8 SVM_9 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.517338
3049
+ 9 SVM_8 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.007936
3050
+ 10 SVM_11 DF_1 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.517338
3051
+ 11 SVM_10 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS 2.007936
3052
+
3053
+ >>> # View model evaluation stats.
3054
+ >>> gs_obj.model_stats
3055
+ MODEL_ID MAE MSE MSLE MAPE ... ME R2 EV MPD MGD
3056
+ 0 SVM_0 2.007936 5.402427 0.007669 88.199346 ... 3.981598 -6.898063 -1.003772 NaN NaN
3057
+ 1 SVM_1 2.517338 7.470182 0.000000 118.722467 ... 4.035658 -7.827958 -0.716572 NaN NaN
3058
+ 2 SVM_3 2.517338 7.470182 0.000000 118.722467 ... 4.035658 -7.827958 -0.716572 NaN NaN
3059
+ 3 SVM_2 2.007936 5.402427 0.007669 88.199346 ... 3.981598 -6.898063 -1.003772 NaN NaN
3060
+ 4 SVM_5 2.517338 7.470182 0.000000 118.722467 ... 4.035658 -7.827958 -0.716572 NaN NaN
3061
+ 5 SVM_7 2.517338 7.470182 0.000000 118.722467 ... 4.035658 -7.827958 -0.716572 NaN NaN
3062
+ 6 SVM_6 2.007936 5.402427 0.007669 88.199346 ... 3.981598 -6.898063 -1.003772 NaN NaN
3063
+ 7 SVM_4 2.007936 5.402427 0.007669 88.199346 ... 3.981598 -6.898063 -1.003772 NaN NaN
3064
+ 8 SVM_9 2.517338 7.470182 0.000000 118.722467 ... 4.035658 -7.827958 -0.716572 NaN NaN
3065
+ 9 SVM_8 2.007936 5.402427 0.007669 88.199346 ... 3.981598 -6.898063 -1.003772 NaN NaN
3066
+ 10 SVM_11 2.517338 7.470182 0.000000 118.722467 ... 4.035658 -7.827958 -0.716572 NaN NaN
3067
+ 11 SVM_10 2.007936 5.402427 0.007669 88.199346 ... 3.981598 -6.898063 -1.003772 NaN NaN
3068
+
3069
+ [12 rows x 13 columns]
3070
+
3071
+ >>> # View best data, model ID and score.
3072
+ >>> print("Best data ID: ", gs_obj.best_data_id)
3073
+ Best data ID: DF_0
3074
+ >>> print("Best model ID: ", gs_obj.best_model_id)
3075
+ Best model ID: SVM_0
3076
+ >>> print("Best model score: ",gs_obj.best_score_)
3077
+ Best model score: 2.0079362549355104
3078
+
3079
+ >>> # Performing prediction on sampled data using best trained model.
3080
+ >>> test_data = transform_obj.result.iloc[:5]
3081
+ >>> gs_pred = gs_obj.predict(newdata=test_data, **eval_params)
3082
+ >>> print("Prediction result: \n", gs_pred.result)
3083
+ Prediction result:
3084
+ id prediction MedHouseVal
3085
+ 0 686 -0.365955 1.578
3086
+ 1 2018 0.411846 0.578
3087
+ 2 1754 -0.634807 1.651
3088
+ 3 670 -0.562927 1.922
3089
+ 4 244 -0.169730 1.117
3090
+ >>> # Perform evaluation using best model.
3091
+ >>> gs_obj.evaluate()
3092
+ ############ result Output ############
3093
+
3094
+ MAE MSE MSLE MAPE MPE RMSE RMSLE ME R2 EV MPD MGD
3095
+ 0 2.007936 5.402427 0.007669 88.199346 88.199346 2.324312 0.087574 3.981598 -6.898063 -1.003772 NaN NaN
3096
+
3097
+
3098
+ >>> # Retrieve any trained model.
3099
+ >>> gs_obj.get_model("SVM_1")
3100
+ ############ output_data Output ############
3101
+
3102
+ iterNum loss eta bias
3103
+ 0 3 2.154842 0.028868 0.0
3104
+ 1 5 2.129916 0.022361 0.0
3105
+ 2 6 2.118539 0.020412 0.0
3106
+ 3 7 2.107991 0.018898 0.0
3107
+ 4 9 2.089022 0.016667 0.0
3108
+ 5 10 2.080426 0.015811 0.0
3109
+ 6 8 2.098182 0.017678 0.0
3110
+ 7 4 2.142030 0.025000 0.0
3111
+ 8 2 2.168233 0.035355 0.0
3112
+ 9 1 2.186740 0.050000 0.0
3113
+
3114
+ ############ result Output ############
3115
+
3116
+ predictor estimate value
3117
+ attribute
3118
+ 7 Latitude 0.010463 None
3119
+ -9 Learning Rate (Initial) 0.050000 None
3120
+ -17 OneClass SVM NaN FALSE
3121
+ -14 Epsilon 0.100000 None
3122
+ 5 Population -0.348591 None
3123
+ -12 Nesterov NaN TRUE
3124
+ -5 BIC 50.585888 None
3125
+ -7 Alpha 0.500000 Elasticnet
3126
+ -3 Number of Observations 31.000000 None
3127
+ 0 (Intercept) 0.000000 None
3128
+
3129
+
3130
+ >>> # Update the default model.
3131
+ >>> gs_obj.set_model("SVM_1")
3132
+
3133
+ >>> # Example 5: Non-Model trainer function. Performing GridSearch
3134
+ >>> # on AntiSelect model trainer function.
3135
+ >>> # Load the example dataset.
3136
+ >>> load_example_data("teradataml", "titanic")
3137
+
3138
+ >>> # Create teradaraml dataframe.
3139
+ >>> titanic = DataFrame.from_table("titanic")
3140
+
3141
+ >>> # Define the non-model trainer function parameter space.
3142
+ >>> # Include input data in parameter space for non-model trainer function.
3143
+ >>> params = {"data":titanic, "exclude":(
3144
+ ['survived', 'name', 'age'],
3145
+ ["ticket", "parch", "sex", "age"])}
3146
+
3147
+ >>> # Import non-model trainer function and optimizer.
3148
+ >>> from teradataml import Antiselect, GridSearch
3149
+
3150
+ >>> # Initialize the GridSearch optimizer with non-model trainer
3151
+ >>> # function and parameter space required for non-model training.
3152
+ >>> gs_obj = GridSearch(func=Antiselect, params=params)
3153
+
3154
+ >>> # Perform execution of Antiselect function.
3155
+ >>> gs_obj.fit()
3156
+
3157
+ >>> # View trained models.
3158
+ >>> gs_obj.models
3159
+ MODEL_ID PARAMETERS STATUS
3160
+ 0 ANTISELECT_1 {'data': '"titanic"', 'exclude': ['ticket', 'p... PASS
3161
+ 1 ANTISELECT_0 {'data': '"titanic"', 'exclude': ['survived', ... PASS
3162
+
3163
+ >>> # Retrieve any trained model using "MODEL_ID".
3164
+ >>> gs_obj.get_model("ANTISELECT_1")
3165
+ ############ result Output ############
3166
+
3167
+ passenger survived pclass name sibsp fare cabin embarked
3168
+ 0 162 1 2 Watt, Mrs. James (Elizabeth "Bessie" Inglis Milne) 0 15.7500 None S
3169
+ 1 591 0 3 Rintamaki, Mr. Matti 0 7.1250 None S
3170
+ 2 387 0 3 Goodwin, Master. Sidney Leonard 5 46.9000 None S
3171
+ 3 469 0 3 Scanlan, Mr. James 0 7.7250 None Q
3172
+ 4 326 1 1 Young, Miss. Marie Grice 0 135.6333 C32 C
3173
+ 5 265 0 3 Henry, Miss. Delia 0 7.7500 None Q
3174
+ 6 530 0 2 Hocking, Mr. Richard George 2 11.5000 None S
3175
+ 7 244 0 3 Maenpaa, Mr. Matti Alexanteri 0 7.1250 None S
3176
+ 8 61 0 3 Sirayanian, Mr. Orsen 0 7.2292 None C
3177
+ 9 122 0 3 Moore, Mr. Leonard Charles 0 8.0500 None S
3178
+
3179
+ """
3180
+
3181
+ self.__params = params.copy()
3182
+ super().__init__(func=func, params=self.__params)
3183
+ # Populate parameter grid from provided parameter space.
3184
+ self.__populate_params_grid()
3185
+
3186
+
3187
+ def __populate_params_grid(self):
3188
+ """
3189
+ DESCRIPTION:
3190
+ Populate parameter grid based on the search algorithm. In GridSearch,
3191
+ populate all combinations of parameters.
3192
+
3193
+ PARAMETERS:
3194
+ None
3195
+
3196
+ RETURNS:
3197
+ None
3198
+
3199
+ RAISES:
3200
+ None
3201
+
3202
+ EXAMPLES:
3203
+ >>> self.__populate_params_grid()
3204
+ """
3205
+ # Populate all parameter combinations for given "params".
3206
+ # Since GridSearch works on all parameter combinations. Set
3207
+ # all the parameter combinations to the parameter grid.
3208
+ self._parameter_grid = self._BaseSearch__populate_parameter_grid()
3209
+
3210
+
3211
+ def fit(self,
3212
+ data=None,
3213
+ evaluation_metric=None,
3214
+ early_stop=None,
3215
+ frac=0.8,
3216
+ run_parallel=True,
3217
+ wait=True,
3218
+ verbose=0,
3219
+ stratify_column=None,
3220
+ sample_id_column=None,
3221
+ sample_seed=None,
3222
+ max_time=None,
3223
+ **kwargs):
3224
+ """
3225
+ DESCRIPTION:
3226
+ Function to perform hyperparameter tuning using GridSearch algorithm.
3227
+ Notes:
3228
+ * In the Model trainer function, the best parameters are
3229
+ selected based on training results.
3230
+ * In the Non model trainer function, First execution parameter
3231
+ set is selected as the best parameters.
3232
+
3233
+ PARAMETERS:
3234
+ data:
3235
+ Optional Argument.
3236
+ Specifies the input teradataml DataFrame for model trainer function.
3237
+ Notes:
3238
+ * DataFrame need not to be passed in fit() methods, when "data" is
3239
+ passed as a model hyperparameters ("params").
3240
+ * "data" is a required argument for model trainer functions.
3241
+ * "data" is ignored for non-model trainer functions.
3242
+ * "data" can be contain single DataFrame or multiple DataFrame.
3243
+ * One can pass multiple dataframes to "data". Hyperparameter
3244
+ tuning is performed on all the dataframes for every model
3245
+ parameter.
3246
+ * "data" can be either a dictionary OR a tuple OR a dataframe.
3247
+ * If it is a dictionary then Key represents the label for
3248
+ dataframe and Value represents the dataframe.
3249
+ * If it is a tuple then teradataml converts it to dictionary
3250
+ by generating the labels internally.
3251
+ * If it is a dataframe then teradataml label it as "DF_0".
3252
+ Types: teradataml DataFrame, dictionary, tuples
3253
+
3254
+ evaluation_metric:
3255
+ Optional Argument.
3256
+ Specifies the evaluation metrics to considered for model
3257
+ evaluation.
3258
+ Notes:
3259
+ * evaluation_metric applicable for model trainer functions.
3260
+ * Best model is not selected when evaluation returns
3261
+ non-finite values.
3262
+ * MPD, MGD, RMSE, RMSLE are not supported for OpenSourceML models.
3263
+ Permitted Values:
3264
+ * Classification: Accuracy, Micro-Precision, Micro-Recall,
3265
+ Micro-F1, Macro-Precision, Macro-Recall,
3266
+ Macro-F1, Weighted-Precision,
3267
+ Weighted-Recall,
3268
+ Weighted-F1.
3269
+ * Regression: MAE, MSE, MSLE, MAPE, MPE, RMSE, RMSLE, ME,
3270
+ R2, EV, MPD, MGD
3271
+
3272
+ Default Value:
3273
+ * Classification: Accuracy
3274
+ * Regression: MAE
3275
+ Types: str
3276
+
3277
+ early_stop:
3278
+ Optional Argument.
3279
+ Specifies the early stop mechanism value for model trainer
3280
+ functions. Hyperparameter tuning ends model training when
3281
+ the training model evaluation metric attains "early_stop" value.
3282
+ Note:
3283
+ * Early stopping supports only when evaluation returns
3284
+ finite value.
3285
+ Types: int or float
3286
+
3287
+ frac:
3288
+ Optional Argument.
3289
+ Specifies the split percentage of rows to be sampled for training
3290
+ and testing dataset. "frac" argument value must range between (0, 1).
3291
+ Notes:
3292
+ * This "frac" argument is not supported for non-model trainer
3293
+ function.
3294
+ * The "frac" value is considered as train split percentage and
3295
+ The remaining percentage is taken into account for test splitting.
3296
+ Default Value: 0.8
3297
+ Types: float
3298
+
3299
+ run_parallel:
3300
+ Optional Argument.
3301
+ Specifies the parallel execution functionality of hyperparameter
3302
+ tuning. When "run_parallel" set to true, model functions are
3303
+ executed concurrently. Otherwise, model functions are executed
3304
+ sequentially.
3305
+ Default Value: True
3306
+ Types: bool
3307
+
3308
+ wait:
3309
+ Optional Argument.
3310
+ Specifies whether to wait for the completion of execution
3311
+ of hyperparameter tuning or not. When set to False, hyperparameter
3312
+ tuning is executed in the background and user can use "is_running()"
3313
+ method to check the status. Otherwise it waits until the execution
3314
+ is complete to return the control back to user.
3315
+ Default Value: True
3316
+ Type: bool
3317
+
3318
+ verbose:
3319
+ Optional Argument.
3320
+ Specifies whether to log the model training information and display
3321
+ the logs. When it is set to 1, progress bar alone logged in the
3322
+ console. When it is set to 2, along with progress bar, execution
3323
+ steps and execution time is logged in the console. When it is set
3324
+ to 0, nothing is logged in the console.
3325
+ Note:
3326
+ * verbose is not significant when "wait" is 'False'.
3327
+ Default Value: 0
3328
+ Type: bool
3329
+
3330
+ sample_seed:
3331
+ Optional Argument.
3332
+ Specifies the seed value that controls the shuffling applied
3333
+ to the data before applying the Train-Test split. Pass an int for
3334
+ reproducible output across multiple function calls.
3335
+ Notes:
3336
+ * When the argument is not specified, different
3337
+ runs of the query generate different outputs.
3338
+ * It must be in the range [0, 2147483647]
3339
+ * Seed is supported for stratify column.
3340
+ Types: int
3341
+
3342
+ stratify_column:
3343
+ Optional Argument.
3344
+ Specifies column name that contains the labels indicating
3345
+ which data needs to be stratified for TrainTest split.
3346
+ Notes:
3347
+ * seed is supported for stratify column.
3348
+ Types: str
3349
+
3350
+ sample_id_column:
3351
+ Optional Argument.
3352
+ Specifies the input data column name that has the
3353
+ unique identifier for each row in the input.
3354
+ Note:
3355
+ * Mandatory when "sample_seed" argument is present.
3356
+ Types: str
3357
+
3358
+ max_time:
3359
+ Optional Argument.
3360
+ Specifies the maximum time for the completion of Hyperparameter tuning execution.
3361
+ Default Value: None
3362
+ Types: int or float
3363
+
3364
+ kwargs:
3365
+ Optional Argument.
3366
+ Specifies the keyword arguments. Accepts additional arguments
3367
+ required for the teradataml analytic function.
3368
+
3369
+ RETURNS:
3370
+ None
3371
+
3372
+ RAISES:
3373
+ TeradataMlException, TypeError, ValueError
3374
+
3375
+ EXAMPLES:
3376
+ >>> # Create an instance of the GridSearch algorithm called "optimizer_obj"
3377
+ >>> optimizer_obj = GridSearch(func=SVM, params=params)
3378
+
3379
+ >>> eval_params = {"id_column": "id",
3380
+ "accumulate": "MedHouseVal"}
3381
+ >>> # Example 1: Passing single DataFrame for model trainer function.
3382
+ >>> optimizer_obj.fit(data=train_df,
3383
+ evaluation_metric="MAE",
3384
+ early_stop=70.9,
3385
+ **eval_params)
3386
+
3387
+ >>> # Example 2: Passing multiple datasets as tuple of DataFrames for
3388
+ >>> # model trainer function.
3389
+ >>> optimizer_obj.fit(data=(train_df_1, train_df_2),
3390
+ evaluation_metric="MAE",
3391
+ early_stop=70.9,
3392
+ **eval_params)
3393
+
3394
+ >>> # Example 3: Passing multiple datasets as dictionary of DataFrames
3395
+ >>> # for model trainer function.
3396
+ >>> optimizer_obj.fit(data={"Data-1":train_df_1, "Data-2":train_df_2},
3397
+ evaluation_metric="MAE",
3398
+ early_stop=70.9,
3399
+ **eval_params)
3400
+
3401
+ >>> # Example 4: No data argument passed in fit() method for model trainer function.
3402
+ >>> # Note: data argument must be passed while creating HPT object as
3403
+ >>> # model hyperparameters.
3404
+
3405
+ >>> # Define parameter space for model training with "data" argument.
3406
+ >>> params = {"data":(df1, df2),
3407
+ "input_columns":['MedInc', 'HouseAge', 'AveRooms',
3408
+ 'AveBedrms', 'Population', 'AveOccup',
3409
+ 'Latitude', 'Longitude'],
3410
+ "response_column":"MedHouseVal",
3411
+ "model_type":"regression",
3412
+ "batch_size":(11, 50, 75),
3413
+ "iter_max":(100, 301),
3414
+ "intercept":False,
3415
+ "learning_rate":"INVTIME",
3416
+ "nesterov":True,
3417
+ "local_sgd_iterations":1}
3418
+
3419
+ >>> # Create "optimizer_obj" using GridSearch algorithm and perform
3420
+ >>> # fit() method without any "data" argument for model trainer function.
3421
+ >>> optimizer_obj.fit(evaluation_metric="MAE",
3422
+ early_stop=70.9,
3423
+ **eval_params)
3424
+
3425
+ >>> # Example 5: Do not pass data argument in fit() method for
3426
+ >>> # non-model trainer function.
3427
+ >>> # Note: data argument must be passed while creating HPT
3428
+ >>> # object as model hyperparameters.
3429
+ >>> optimizer_obj.fit()
3430
+
3431
+ >>> # Example 6: Passing "verbose" argument value '1' in fit() method to
3432
+ >>> # display model log.
3433
+ >>> optimizer_obj.fit(data=train_df, evaluation_metric="R2",
3434
+ verbose=1, **eval_params)
3435
+ completed: |████████████████████████████████████████████████████████████| 100% - 6/6
3436
+
3437
+ >>> # Example 7: max_time argument is passed in fit() method.
3438
+ >>> # Model training parameters
3439
+ >>> model_params = {"input_columns":['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
3440
+ ... "response_column" :'species',
3441
+ ... "max_depth":(5,10,15),
3442
+ ... "lambda1" :(1000.0,0.001),
3443
+ ... "model_type" :"Classification",
3444
+ ... "seed":32,
3445
+ ... "shrinkage_factor":0.1,
3446
+ ... "iter_num":(5, 50)}
3447
+ >>>
3448
+ >>> eval_params = {"id_column": "id",
3449
+ ... "accumulate":"species",
3450
+ ... "model_type":'Classification',
3451
+ ... "object_order_column":['task_index', 'tree_num', 'iter','class_num', 'tree_order']
3452
+ }
3453
+ >>>
3454
+ >>> # Import model trainer function and optimizer.
3455
+ >>> from teradataml import XGBoost, GridSearch
3456
+ >>>
3457
+ >>> # Initialize the GridSearch optimizer with model trainer
3458
+ >>> # function and parameter space required for model training.
3459
+ >>> gs_obj = GridSearch(func=XGBoost, params=model_params)
3460
+ >>>
3461
+ >>> # fit() method with max_time argument(in seconds) for model trainer function.
3462
+ >>> gs_obj.fit(data=data, max_time=30, verbose=2, **eval_params)
3463
+ Model_id:XGBOOST_2 - Run time:33.277s - Status:PASS - ACCURACY:0.933
3464
+ Model_id:XGBOOST_3 - Run time:33.276s - Status:PASS - ACCURACY:0.933
3465
+ Model_id:XGBOOST_0 - Run time:33.279s - Status:PASS - ACCURACY:0.967
3466
+ Model_id:XGBOOST_1 - Run time:33.278s - Status:PASS - ACCURACY:0.933
3467
+ Computing: |⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾| 33% - 4/12
3468
+ >>>
3469
+ >>> # status 'SKIP' for the models which are not completed within the max_time.
3470
+ >>> gs_obj.models
3471
+ MODEL_ID DATA_ID PARAMETERS STATUS ACCURACY
3472
+ 0 XGBOOST_2 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... PASS 0.933333
3473
+ 1 XGBOOST_4 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... SKIP NaN
3474
+ 2 XGBOOST_5 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... SKIP NaN
3475
+ 3 XGBOOST_6 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... SKIP NaN
3476
+ 4 XGBOOST_7 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... SKIP NaN
3477
+ 5 XGBOOST_8 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... SKIP NaN
3478
+ 6 XGBOOST_9 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... SKIP NaN
3479
+ 7 XGBOOST_10 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... SKIP NaN
3480
+ 8 XGBOOST_11 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... SKIP NaN
3481
+ 9 XGBOOST_3 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... PASS 0.933333
3482
+ 10 XGBOOST_0 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... PASS 0.966667
3483
+ 11 XGBOOST_1 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... PASS 0.933333
3484
+ """
3485
+
3486
+ # Set the flag to discard invalid column parameters.
3487
+ self.discard_invalid_column_params =kwargs.get("discard_invalid_column_params", False)
3488
+
3489
+ if self.discard_invalid_column_params:
3490
+ # Setting model trainer input data.
3491
+ super()._setting_model_trainer_data(data)
3492
+ # Data mapping for model trainer function.
3493
+ super()._data_mapping()
3494
+ # Setting the lambda function to None.
3495
+ self._setting_model_trainer_data = lambda data: None
3496
+ self._BaseSearch__update_model_parameters = lambda: None
3497
+
3498
+ # Calling baseSearch class fit method.
3499
+ super().fit(data, evaluation_metric,
3500
+ early_stop, frac, run_parallel,
3501
+ wait, verbose, stratify_column,
3502
+ sample_id_column, sample_seed,
3503
+ max_time, **kwargs)
3504
+
3505
+
3506
+ class RandomSearch(_BaseSearch):
3507
+ def __init__(self, func, params, n_iter=10, **kwargs):
3508
+ """
3509
+ DESCRIPTION:
3510
+ RandomSearch algorithm performs random sampling on hyperparameter
3511
+ space to identify optimal hyperparameters. It works for
3512
+ teradataml analytic functions from SQLE, BYOM, VAL and UAF features.
3513
+ teradataml RandomSearch allows user to perform hyperparameter tuning for
3514
+ all model trainer and non-model trainer functions.
3515
+ When used for model trainer functions:
3516
+ * Based on evaluation metrics search determines best model.
3517
+ * All methods and properties can be used.
3518
+ When used for non-model trainer functions:
3519
+ * Only fit() method is supported.
3520
+ * User can choose the best output as they see fit to use this.
3521
+
3522
+ teradataml RandomSearch also allows user to use input data as the
3523
+ hyperparameter. This option can be suitable when the user wants to
3524
+ identify the best models for a set of input data. When user passes
3525
+ set of data as hyperparameter for model trainer function, the search
3526
+ determines the best data along with the best model based on the
3527
+ evaluation metrics.
3528
+ Note:
3529
+ * configure.temp_object_type="VT" follows sequential execution.
3530
+
3531
+ PARAMETERS:
3532
+ func:
3533
+ Required Argument.
3534
+ Specifies a teradataml analytic function from SQLE, VAL, and UAF.
3535
+ Types:
3536
+ teradataml Analytic Functions
3537
+ * Advanced analytic functions
3538
+ * UAF
3539
+ * VAL
3540
+ Refer to display_analytic_functions() function for list of functions.
3541
+
3542
+ params:
3543
+ Required Argument.
3544
+ Specifies the parameter(s) of a teradataml analytic function.
3545
+ The parameter(s) must be in dictionary. keys refers to the
3546
+ argument names and values refers to argument values for corresponding
3547
+ arguments.
3548
+ Notes:
3549
+ * One can specify the argument value in a tuple to run HPT
3550
+ with different arguments.
3551
+ * Model trainer function arguments "id_column", "input_columns",
3552
+ and "target_columns" must be passed in fit() method.
3553
+ * All required arguments of non-model trainer function must be
3554
+ passed while RandomSearch object creation.
3555
+ Types: dict
3556
+
3557
+ n_iter:
3558
+ Optional Argument.
3559
+ Specifies the number of iterations random search need to be performed.
3560
+ Note:
3561
+ * n_iter must be less than the size of parameter populations.
3562
+ Default Value: 10
3563
+ Types: int
3564
+
3565
+ RETURNS:
3566
+ None
3567
+
3568
+ RAISES:
3569
+ TeradataMlException, TypeError, ValueError
3570
+
3571
+ EXAMPLES:
3572
+ >>> # Example 1: Model trainer function. Performing hyperparameter-tuning
3573
+ >>> # on SVM model trainer function using random search algorithm.
3574
+
3575
+ >>> # Load the example data.
3576
+ >>> load_example_data("teradataml", ["cal_housing_ex_raw"])
3577
+
3578
+ >>> # Create teradataml DataFrame objects.
3579
+ >>> data_input = DataFrame.from_table("cal_housing_ex_raw")
3580
+
3581
+ >>> # Scale "target_columns" with respect to 'STD' value of the column.
3582
+ >>> fit_obj = ScaleFit(data=data_input,
3583
+ target_columns=['MedInc', 'HouseAge', 'AveRooms',
3584
+ 'AveBedrms', 'Population', 'AveOccup',
3585
+ 'Latitude', 'Longitude'],
3586
+ scale_method="STD")
3587
+
3588
+ >>> # Transform the data.
3589
+ >>> transform_obj = ScaleTransform(data=data_input,
3590
+ object=fit_obj.output,
3591
+ accumulate=["id", "MedHouseVal"])
3592
+
3593
+ >>> # Define parameter space for model training.
3594
+ >>> # Note: These parameters create 6 models based on batch_size and iter_max.
3595
+ >>> params = {"input_columns":['MedInc', 'HouseAge', 'AveRooms',
3596
+ 'AveBedrms', 'Population', 'AveOccup',
3597
+ 'Latitude', 'Longitude'],
3598
+ "response_column":"MedHouseVal",
3599
+ "model_type":"regression",
3600
+ "batch_size":(11, 50, 75),
3601
+ "iter_max":(100, 301),
3602
+ "lambda1":0.1,
3603
+ "alpha":0.5,
3604
+ "iter_num_no_change":60,
3605
+ "tolerance":0.01,
3606
+ "intercept":False,
3607
+ "learning_rate":"INVTIME",
3608
+ "initial_data":0.5,
3609
+ "decay_rate":0.5,
3610
+ "momentum":0.6,
3611
+ "nesterov":True,
3612
+ "local_sgd_iterations":1}
3613
+
3614
+ >>> # Import trainer function and optimizer.
3615
+ >>> from teradataml import SVM, RandomSearch
3616
+
3617
+ >>> # Initialize the random search optimizer with model trainer
3618
+ >>> # function and parameter space required for model training.
3619
+ >>> rs_obj = RandomSearch(func=SVM, params=params, n_iter=3)
3620
+
3621
+ >>> # Perform model optimization for SVM function.
3622
+ >>> # Evaluation and prediction arguments are passed along with
3623
+ >>> # training dataframe.
3624
+ >>> rs_obj.fit(data=transform_obj.result, evaluation_metric="R2",
3625
+ id_column="id", verbose=1)
3626
+ completed: |████████████████████████████████████████████████████████████| 100% - 3/3
3627
+ >>> # View trained models.
3628
+ >>> rs_obj.models
3629
+ MODEL_ID DATA_ID PARAMETERS STATUS R2
3630
+ 0 SVM_2 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS -3.668091
3631
+ 1 SVM_1 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS -3.668091
3632
+ 2 SVM_0 DF_0 {'input_columns': ['MedInc', 'HouseAge', 'AveR... PASS -3.668091
3633
+
3634
+ >>> # View model evaluation stats.
3635
+ >>> rs_obj.model_stats
3636
+ MODEL_ID MAE MSE MSLE MAPE ... ME R2 EV MPD MGD
3637
+ 0 SVM_2 2.354167 6.715689 0.0 120.054758 ... 3.801619 -3.668091 0.184238 NaN NaN
3638
+ 1 SVM_1 2.354167 6.715689 0.0 120.054758 ... 3.801619 -3.668091 0.184238 NaN NaN
3639
+ 2 SVM_0 2.354167 6.715689 0.0 120.054758 ... 3.801619 -3.668091 0.184238 NaN NaN
3640
+
3641
+ [3 rows x 13 columns]
3642
+
3643
+ >>> # Performing prediction on sampled data using best trained model.
3644
+ >>> test_data = transform_obj.result.iloc[:5]
3645
+ >>> rs_pred = rs_obj.predict(newdata=test_data, id_column="id")
3646
+ >>> print("Prediction result: \n", rs_pred.result)
3647
+ Prediction result:
3648
+ id prediction
3649
+ 0 686 -0.024033
3650
+ 1 2018 -0.069738
3651
+ 2 1754 -0.117881
3652
+ 3 670 -0.021818
3653
+ 4 244 -0.187346
3654
+
3655
+ >>> # Perform evaluation using best model.
3656
+ >>> rs_obj.evaluate()
3657
+ ############ result Output ############
3658
+
3659
+ MAE MSE MSLE MAPE MPE RMSE RMSLE ME R2 EV MPD MGD
3660
+ 0 2.354167 6.715689 0.0 120.054758 120.054758 2.591465 0.0 3.801619 -3.668091 0.184238 NaN NaN
3661
+
3662
+ >>> # Retrieve any trained model.
3663
+ >>> rs_obj.get_model("SVM_1")
3664
+ ############ output_data Output ############
3665
+
3666
+ iterNum loss eta bias
3667
+ 0 3 2.012817 0.028868 0.0
3668
+ 1 5 2.010455 0.022361 0.0
3669
+ 2 6 2.009331 0.020412 0.0
3670
+ 3 7 2.008276 0.018898 0.0
3671
+ 4 9 2.006384 0.016667 0.0
3672
+ 5 10 2.005518 0.015811 0.0
3673
+ 6 8 2.007302 0.017678 0.0
3674
+ 7 4 2.011636 0.025000 0.0
3675
+ 8 2 2.014326 0.035355 0.0
3676
+ 9 1 2.016398 0.050000 0.0
3677
+
3678
+ ############ result Output ############
3679
+
3680
+ predictor estimate value
3681
+ attribute
3682
+ -7 Alpha 0.500000 Elasticnet
3683
+ -3 Number of Observations 55.000000 None
3684
+ 5 Population 0.000000 None
3685
+ 0 (Intercept) 0.000000 None
3686
+ -17 OneClass SVM NaN FALSE
3687
+ -16 Kernel NaN LINEAR
3688
+ -1 Loss Function NaN EPSILON_INSENSITIVE
3689
+ 7 Latitude -0.076648 None
3690
+ -9 Learning Rate (Initial) 0.050000 None
3691
+ -14 Epsilon 0.100000 None
3692
+
3693
+
3694
+ >>> # View best data, model ID, score and parameters.
3695
+ >>> print("Best data ID: ", rs_obj.best_data_id)
3696
+ Best data ID: DF_0
3697
+ >>> print("Best model ID: ", rs_obj.best_model_id)
3698
+ Best model ID: SVM_2
3699
+ >>> print("Best model score: ", rs_obj.best_score_)
3700
+ Best model score: -3.6680912444156455
3701
+ >>> print("Best model parameters: ", rs_obj.best_params_)
3702
+ Best model parameters: {'input_columns': ['MedInc', 'HouseAge', 'AveRooms',
3703
+ 'AveBedrms', 'Population', 'AveOccup', 'Latitude', 'Longitude'],
3704
+ 'response_column': 'MedHouseVal', 'model_type': 'regression',
3705
+ 'batch_size': 50, 'iter_max': 301, 'lambda1': 0.1, 'alpha': 0.5,
3706
+ 'iter_num_no_change': 60, 'tolerance': 0.01, 'intercept': False,
3707
+ 'learning_rate': 'INVTIME', 'initial_data': 0.5, 'decay_rate': 0.5,
3708
+ 'momentum': 0.6, 'nesterov': True, 'local_sgd_iterations': 1,
3709
+ 'data': '"ALICE"."ml__select__1696595493985650"'}
3710
+
3711
+ >>> # Update the default model.
3712
+ >>> rs_obj.set_model("SVM_1")
3713
+
3714
+ >>> # Example 2: Non-Model trainer function. Performing random search
3715
+ >>> # on AntiSelect model trainer function using random
3716
+ >>> # search algorithm.
3717
+
3718
+ >>> # Load the example dataset.
3719
+ >>> load_example_data("teradataml", "titanic")
3720
+
3721
+ >>> # Create teradaraml dataframe.
3722
+ >>> titanic = DataFrame.from_table("titanic")
3723
+
3724
+ >>> # Define the non-model trainer function parameter space.
3725
+ >>> # Include input data in parameter space for non-model trainer function.
3726
+ >>> # Note: These parameters creates two model hyperparameters.
3727
+ >>> params = {"data":titanic, "exclude":(['survived', 'age'],['age'],
3728
+ ['survived', 'name', 'age'],
3729
+ ['ticket'],['parch'],['sex','age'],
3730
+ ['survived'], ['ticket','parch'],
3731
+ ["ticket", "parch", "sex", "age"])}
3732
+
3733
+ >>> # Import non-model trainer function and optimizer.
3734
+ >>> from teradataml import Antiselect, RandomSearch
3735
+
3736
+ >>> # Initialize the random search optimizer with non-model trainer
3737
+ >>> # function and parameter space required for non-model training.
3738
+ >>> rs_obj = RandomSearch(func=Antiselect, params=params, n_iter=4)
3739
+
3740
+ >>> # Perform execution of Antiselect function.
3741
+ >>> rs_obj.fit()
3742
+
3743
+ >>> # Note: Since it is a non-model trainer function model ID, score
3744
+ >>> # and parameters are not applicable here.
3745
+ >>> # View trained models.
3746
+ >>> rs_obj.models
3747
+ MODEL_ID PARAMETERS STATUS
3748
+ 0 ANTISELECT_1 {'data': '"titanic"', 'exclude': ['survived', ... PASS
3749
+ 1 ANTISELECT_3 {'data': '"titanic"', 'exclude': ['ticket', 'p... PASS
3750
+ 2 ANTISELECT_2 {'data': '"titanic"', 'exclude': ['survived']} PASS
3751
+ 3 ANTISELECT_0 {'data': '"titanic"', 'exclude': ['sex', 'age']} PASS
3752
+
3753
+ >>> # Retrieve any trained model using "MODEL_ID".
3754
+ >>> rs_obj.get_model("ANTISELECT_0")
3755
+ ############ result Output ############
3756
+
3757
+ passenger survived pclass name sibsp parch ticket fare cabin embarked
3758
+ 0 162 1 2 Watt, Mrs. James (Elizabeth "Bessie" Inglis Milne) 0 0 C.A. 33595 15.7500 None S
3759
+ 1 591 0 3 Rintamaki, Mr. Matti 0 0 STON/O 2. 3101273 7.1250 None S
3760
+ 2 387 0 3 Goodwin, Master. Sidney Leonard 5 2 CA 2144 46.9000 None S
3761
+ 3 469 0 3 Scanlan, Mr. James 0 0 36209 7.7250 None Q
3762
+ 4 326 1 1 Young, Miss. Marie Grice 0 0 PC 17760 135.6333 C32 C
3763
+ 5 265 0 3 Henry, Miss. Delia 0 0 382649 7.7500 None Q
3764
+ 6 530 0 2 Hocking, Mr. Richard George 2 1 29104 11.5000 None S
3765
+ 7 244 0 3 Maenpaa, Mr. Matti Alexanteri 0 0 STON/O 2. 3101275 7.1250 None S
3766
+ 8 61 0 3 Sirayanian, Mr. Orsen 0 0 2669 7.2292 None C
3767
+ 9 122 0 3 Moore, Mr. Leonard Charles 0 0 A4. 54510 8.0500 None S
3768
+
3769
+ """
3770
+
3771
+ self.__params = params.copy()
3772
+ super().__init__(func=func, params=self.__params)
3773
+ # Validate argument 'n_iter'
3774
+ awu_matrix = []
3775
+ awu_matrix.append(["n_iter", n_iter, True, int])
3776
+ _Validators._validate_positive_int(n_iter, "n_iter")
3777
+ self.set_parameter_grid()
3778
+ parameter_space = self.get_parameter_grid()
3779
+ # Validates the range of n_iter should be greater than or equal to 1 and
3780
+ # less than or equal to parameter space.
3781
+ _Validators._validate_argument_range(n_iter, "n_iter", 1, len(parameter_space), True, True)
3782
+ self._n_iter = n_iter
3783
+
3784
+ def __populate_params_grid(self):
3785
+ """
3786
+ DESCRIPTION:
3787
+ Populate parameter grid based on the search algorithm. In random search,
3788
+ Random selection performed on given hyperparameters.
3789
+
3790
+ PARAMETERS:
3791
+ n_iter:
3792
+ Required Argument.
3793
+ Specifies number of parameters need to be sampled.
3794
+ Types: int
3795
+
3796
+ RETURNS:
3797
+ None
3798
+
3799
+ RAISES:
3800
+ TeradataMlException
3801
+
3802
+ EXAMPLES:
3803
+ >>> self.__populate_params_grid()
3804
+ """
3805
+ # Populate the parameter space with random and non-repetitive value
3806
+ if self.discard_invalid_column_params:
3807
+ # Defining the empty data_grouped_dict to group the parameters based on data_id.
3808
+ data_grouped_dict = defaultdict(list)
3809
+ for parameter in self._parameter_grid:
3810
+ # Extracting the data_id from the parameter.
3811
+ data_id = parameter['data_id']
3812
+ # Grouping the parameters based on data_id.
3813
+ data_grouped_dict[data_id].append(parameter)
3814
+ # Converting the grouped dictionary to list.
3815
+ data_grouped_dict = list(data_grouped_dict.values())
3816
+ parameter_grid = []
3817
+ for group in data_grouped_dict:
3818
+ # Randomly selecting the n_iter parameters from the grouped data.
3819
+ tmp = random.sample(group, self._n_iter)
3820
+ parameter_grid.extend(tmp)
3821
+
3822
+ # Setting the parameter grid.
3823
+ self._parameter_grid = parameter_grid
3824
+ else:
3825
+ self._parameter_grid = random.sample(self.get_parameter_grid(), self._n_iter)
3826
+
3827
+ def fit(self,
3828
+ data=None,
3829
+ evaluation_metric=None,
3830
+ early_stop=None,
3831
+ frac=0.8,
3832
+ run_parallel=True,
3833
+ wait=True,
3834
+ verbose=0,
3835
+ stratify_column=None,
3836
+ sample_id_column=None,
3837
+ sample_seed=None,
3838
+ max_time=None,
3839
+ **kwargs):
3840
+ """
3841
+ DESCRIPTION:
3842
+ Function to perform hyperparameter tuning using RandomSearch algorithm.
3843
+ Notes:
3844
+ * In the Model trainer function, the best parameters are
3845
+ selected based on training results.
3846
+ * In the Non model trainer function, First execution parameter
3847
+ set is selected as the best parameters.
3848
+
3849
+ PARAMETERS:
3850
+ data:
3851
+ Optional Argument.
3852
+ Specifies the input teradataml DataFrame for model trainer function.
3853
+ Notes:
3854
+ * DataFrame need not to be passed in fit() methods, when "data" is
3855
+ passed as a model hyperparameters ("params").
3856
+ * "data" is a required argument for model trainer functions.
3857
+ * "data" is ignored for non-model trainer functions.
3858
+ * "data" can be contain single DataFrame or multiple DataFrame.
3859
+ * One can pass multiple dataframes to "data". Hyperparameter
3860
+ tuning is performed on all the dataframes for every model
3861
+ parameter.
3862
+ * "data" can be either a dictionary OR a tuple OR a dataframe.
3863
+ * If it is a dictionary then Key represents the label for
3864
+ dataframe and Value represents the dataframe.
3865
+ * If it is a tuple then teradataml converts it to dictionary
3866
+ by generating the labels internally.
3867
+ * If it is a dataframe then teradataml label it as "DF_0".
3868
+ Types: teradataml DataFrame, dictionary, tuples
3869
+
3870
+ evaluation_metric:
3871
+ Optional Argument.
3872
+ Specifies the evaluation metrics to considered for model
3873
+ evaluation.
3874
+ Notes:
3875
+ * evaluation_metric applicable for model trainer functions.
3876
+ * Best model is not selected when evaluation returns
3877
+ non-finite values.
3878
+ * MPD, MGD, RMSE, RMSLE are not supported for OpenSourceML models.
3879
+ Permitted Values:
3880
+ * Classification: Accuracy, Micro-Precision, Micro-Recall,
3881
+ Micro-F1, Macro-Precision, Macro-Recall,
3882
+ Macro-F1, Weighted-Precision,
3883
+ Weighted-Recall,
3884
+ Weighted-F1.
3885
+ * Regression: MAE, MSE, MSLE, MAPE, MPE, RMSE, RMSLE, ME,
3886
+ R2, EV, MPD, MGD
3887
+
3888
+ Default Value:
3889
+ * Classification: Accuracy
3890
+ * Regression: MAE
3891
+ Types: str
3892
+
3893
+ early_stop:
3894
+ Optional Argument.
3895
+ Specifies the early stop mechanism value for model trainer
3896
+ functions. Hyperparameter tuning ends model training when
3897
+ the training model evaluation metric attains "early_stop" value.
3898
+ Note:
3899
+ * Early stopping supports only when evaluation returns
3900
+ finite value.
3901
+ Types: int or float
3902
+
3903
+ frac:
3904
+ Optional Argument.
3905
+ Specifies the split percentage of rows to be sampled for training
3906
+ and testing dataset. "frac" argument value must range between (0, 1).
3907
+ Notes:
3908
+ * This "frac" argument is not supported for non-model trainer
3909
+ function.
3910
+ * The "frac" value is considered as train split percentage and
3911
+ The remaining percentage is taken into account for test splitting.
3912
+ Default Value: 0.8
3913
+ Types: float
3914
+
3915
+ run_parallel:
3916
+ Optional Argument.
3917
+ Specifies the parallel execution functionality of hyperparameter
3918
+ tuning. When "run_parallel" set to true, model functions are
3919
+ executed concurrently. Otherwise, model functions are executed
3920
+ sequentially.
3921
+ Default Value: True
3922
+ Types: bool
3923
+
3924
+ wait:
3925
+ Optional Argument.
3926
+ Specifies whether to wait for the completion of execution
3927
+ of hyperparameter tuning or not. When set to False, hyperparameter
3928
+ tuning is executed in the background and user can use "is_running()"
3929
+ method to check the status. Otherwise it waits until the execution
3930
+ is complete to return the control back to user.
3931
+ Default Value: True
3932
+ Type: bool
3933
+
3934
+ verbose:
3935
+ Optional Argument.
3936
+ Specifies whether to log the model training information and display
3937
+ the logs. When it is set to 1, progress bar alone logged in the
3938
+ console. When it is set to 2, along with progress bar, execution
3939
+ steps and execution time is logged in the console. When it is set
3940
+ to 0, nothing is logged in the console.
3941
+ Note:
3942
+ * verbose is not significant when "wait" is 'False'.
3943
+ Default Value: 0
3944
+ Type: bool
3945
+
3946
+ sample_seed:
3947
+ Optional Argument.
3948
+ Specifies the seed value that controls the shuffling applied
3949
+ to the data before applying the Train-Test split. Pass an int for
3950
+ reproducible output across multiple function calls.
3951
+ Notes:
3952
+ * When the argument is not specified, different
3953
+ runs of the query generate different outputs.
3954
+ * It must be in the range [0, 2147483647]
3955
+ * Seed is supported for stratify column.
3956
+ Types: int
3957
+
3958
+ stratify_column:
3959
+ Optional Argument.
3960
+ Specifies column name that contains the labels indicating
3961
+ which data needs to be stratified for TrainTest split.
3962
+ Notes:
3963
+ * seed is supported for stratify column.
3964
+ Types: str
3965
+
3966
+ sample_id_column:
3967
+ Optional Argument.
3968
+ Specifies the input data column name that has the
3969
+ unique identifier for each row in the input.
3970
+ Note:
3971
+ * Mandatory when "sample_seed" argument is present.
3972
+ Types: str
3973
+
3974
+ max_time:
3975
+ Optional Argument.
3976
+ Specifies the maximum time for the completion of Hyperparameter tuning execution.
3977
+ Default Value: None
3978
+ Types: int or float
3979
+
3980
+ kwargs:
3981
+ Optional Argument.
3982
+ Specifies the keyword arguments. Accepts additional arguments
3983
+ required for the teradataml analytic function.
3984
+
3985
+ RETURNS:
3986
+ None
3987
+
3988
+ RAISES:
3989
+ TeradataMlException, TypeError, ValueError
3990
+
3991
+ EXAMPLES:
3992
+ >>> # Create an instance of the RandomSearch algorithm called "optimizer_obj"
3993
+ >>> optimizer_obj = RandomSearch(func=SVM, params=params, n_iter=3)
3994
+
3995
+ >>> eval_params = {"id_column": "id",
3996
+ "accumulate": "MedHouseVal"}
3997
+ >>> # Example 1: Passing single DataFrame for model trainer function.
3998
+ >>> optimizer_obj.fit(data=train_df,
3999
+ evaluation_metric="MAE",
4000
+ early_stop=70.9,
4001
+ **eval_params)
4002
+
4003
+ >>> # Example 2: Passing multiple datasets as tuple of DataFrames for
4004
+ >>> # model trainer function.
4005
+ >>> optimizer_obj.fit(data=(train_df_1, train_df_2),
4006
+ evaluation_metric="MAE",
4007
+ early_stop=70.9,
4008
+ **eval_params)
4009
+
4010
+ >>> # Example 3: Passing multiple datasets as dictionary of DataFrames
4011
+ >>> # for model trainer function.
4012
+ >>> optimizer_obj.fit(data={"Data-1":train_df_1, "Data-2":train_df_2},
4013
+ evaluation_metric="MAE",
4014
+ early_stop=70.9,
4015
+ **eval_params)
4016
+
4017
+ >>> # Example 4: No data argument passed in fit() method for model trainer function.
4018
+ >>> # Note: data argument must be passed while creating HPT object as
4019
+ >>> # model hyperparameters.
4020
+
4021
+ >>> # Define parameter space for model training with "data" argument.
4022
+ >>> params = {"data":(df1, df2),
4023
+ "input_columns":['MedInc', 'HouseAge', 'AveRooms',
4024
+ 'AveBedrms', 'Population', 'AveOccup',
4025
+ 'Latitude', 'Longitude'],
4026
+ "response_column":"MedHouseVal",
4027
+ "model_type":"regression",
4028
+ "batch_size":(11, 50, 75),
4029
+ "iter_max":(100, 301),
4030
+ "intercept":False,
4031
+ "learning_rate":"INVTIME",
4032
+ "nesterov":True,
4033
+ "local_sgd_iterations":1}
4034
+
4035
+ >>> # Create "optimizer_obj" using RandomSearch algorithm and perform
4036
+ >>> # fit() method without any "data" argument for model trainer function.
4037
+ >>> optimizer_obj.fit(evaluation_metric="MAE",
4038
+ early_stop=70.9,
4039
+ **eval_params)
4040
+
4041
+ >>> # Example 5: Do not pass data argument in fit() method for
4042
+ >>> # non-model trainer function.
4043
+ >>> # Note: data argument must be passed while creating HPT
4044
+ >>> # object as model hyperparameters.
4045
+ >>> optimizer_obj.fit()
4046
+
4047
+ >>> # Example 6: Passing "verbose" argument value '1' in fit() method to
4048
+ >>> # display model log.
4049
+ >>> optimizer_obj.fit(data=train_df, evaluation_metric="R2",
4050
+ verbose=1, **eval_params)
4051
+ completed: |████████████████████████████████████████████████████████████| 100% - 6/6
4052
+
4053
+ >>> # Example 7: max_time argument is passed in fit() method.
4054
+ >>> # Model training parameters
4055
+ >>> model_params = {"input_columns":['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
4056
+ ... "response_column" : 'species',
4057
+ ... "max_depth":(5,10,15),
4058
+ ... "lambda1" : (1000.0,0.001),
4059
+ ... "model_type" :"Classification",
4060
+ ... "seed":32,
4061
+ ... "shrinkage_factor":0.1,
4062
+ ... "iter_num":(5, 50)}
4063
+ >>>
4064
+ >>> eval_params = {"id_column": "id",
4065
+ ... "accumulate": "species",
4066
+ ... "model_type":'Classification',
4067
+ ... "object_order_column":['task_index', 'tree_num', 'iter','class_num', 'tree_order']
4068
+ ... }
4069
+ >>>
4070
+ >>> # Import model trainer and optimizer
4071
+ >>> from teradataml import XGBoost, RandomSearch
4072
+ >>>
4073
+ >>> # Initialize the RandomSearch optimizer with model trainer
4074
+ >>> # function and parameter space required for model training.
4075
+ >>> rs_obj = RandomSearch(func=XGBoost, params=model_params, n_iter=5)
4076
+ >>>
4077
+ >>> # fit() method with max_time argument(in seconds) for model trainer function.
4078
+ >>> rs_obj.fit(data=data, max_time=30, verbose=2, **eval_params)
4079
+ Model_id:XGBOOST_3 - Run time:28.292s - Status:PASS - ACCURACY:0.8
4080
+ Model_id:XGBOOST_0 - Run time:28.291s - Status:PASS - ACCURACY:0.867
4081
+ Model_id:XGBOOST_2 - Run time:28.289s - Status:PASS - ACCURACY:0.867
4082
+ Model_id:XGBOOST_1 - Run time:28.291s - Status:PASS - ACCURACY:0.867
4083
+ Computing: |⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫿⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾⫾| 80% - 4/5
4084
+ >>>
4085
+ >>> # status 'SKIP' for the models which are not completed within the max_time.
4086
+ >>> rs_obj.models
4087
+ MODEL_ID DATA_ID PARAMETERS STATUS ACCURACY
4088
+ 0 XGBOOST_3 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... PASS 0.800000
4089
+ 1 XGBOOST_4 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... SKIP NaN
4090
+ 2 XGBOOST_0 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... PASS 0.866667
4091
+ 3 XGBOOST_2 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... PASS 0.866667
4092
+ 4 XGBOOST_1 DF_0 {'input_columns': ['sepal_length', 'sepal_widt... PASS 0.866667
4093
+ """
4094
+
4095
+ # Set discard_invalid_column_params flag.
4096
+ self.discard_invalid_column_params =kwargs.get("discard_invalid_column_params", False)
4097
+
4098
+ if self.discard_invalid_column_params:
4099
+ # Setting model trainer input data
4100
+ super()._setting_model_trainer_data(data)
4101
+ # Mapping the data with input columns
4102
+ super()._data_mapping()
4103
+ # Setting the lambda function to None.
4104
+ self._setting_model_trainer_data = lambda data: None
4105
+ self._BaseSearch__update_model_parameters = lambda: None
4106
+
4107
+ # Populate parameter grid.
4108
+ self.__populate_params_grid()
4109
+
4110
+ # Calling baseSearch class fit method.
4111
+ super().fit(data, evaluation_metric, early_stop,
4112
+ frac, run_parallel, wait, verbose,
4113
+ stratify_column, sample_id_column,
4114
+ sample_seed, max_time, **kwargs)
4115
+