teradataml 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (432) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +238 -1
  4. teradataml/__init__.py +13 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/Transformations.py +4 -4
  7. teradataml/analytics/__init__.py +0 -2
  8. teradataml/analytics/analytic_function_executor.py +3 -0
  9. teradataml/analytics/json_parser/utils.py +13 -12
  10. teradataml/analytics/sqle/DecisionTreePredict.py +15 -30
  11. teradataml/analytics/sqle/NaiveBayesPredict.py +11 -20
  12. teradataml/analytics/sqle/__init__.py +0 -13
  13. teradataml/analytics/utils.py +1 -0
  14. teradataml/analytics/valib.py +3 -0
  15. teradataml/automl/__init__.py +1628 -0
  16. teradataml/automl/custom_json_utils.py +1270 -0
  17. teradataml/automl/data_preparation.py +993 -0
  18. teradataml/automl/data_transformation.py +727 -0
  19. teradataml/automl/feature_engineering.py +1648 -0
  20. teradataml/automl/feature_exploration.py +547 -0
  21. teradataml/automl/model_evaluation.py +163 -0
  22. teradataml/automl/model_training.py +887 -0
  23. teradataml/catalog/__init__.py +0 -2
  24. teradataml/catalog/byom.py +49 -6
  25. teradataml/catalog/function_argument_mapper.py +0 -2
  26. teradataml/catalog/model_cataloging_utils.py +2 -1021
  27. teradataml/common/aed_utils.py +6 -2
  28. teradataml/common/constants.py +50 -58
  29. teradataml/common/deprecations.py +160 -0
  30. teradataml/common/garbagecollector.py +61 -104
  31. teradataml/common/messagecodes.py +27 -36
  32. teradataml/common/messages.py +11 -15
  33. teradataml/common/utils.py +205 -287
  34. teradataml/common/wrapper_utils.py +1 -110
  35. teradataml/context/context.py +150 -78
  36. teradataml/data/bank_churn.csv +10001 -0
  37. teradataml/data/bmi.csv +501 -0
  38. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +3 -3
  39. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +6 -5
  40. teradataml/data/docs/sqle/docs_17_10/Fit.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +1 -1
  42. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +1 -1
  43. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +2 -2
  44. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +2 -1
  45. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +1 -0
  46. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +1 -1
  47. teradataml/data/docs/sqle/docs_17_10/Transform.py +2 -1
  48. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +3 -3
  49. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +6 -5
  50. teradataml/data/docs/sqle/docs_17_20/Fit.py +1 -1
  51. teradataml/data/docs/sqle/docs_17_20/GLM.py +1 -1
  52. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +9 -10
  53. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +3 -2
  54. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +16 -15
  55. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +2 -2
  56. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +2 -2
  57. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +8 -8
  58. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +21 -20
  59. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +1 -1
  60. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +8 -3
  61. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +6 -5
  62. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +6 -6
  63. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +2 -1
  64. teradataml/data/docs/sqle/docs_17_20/SVM.py +1 -1
  65. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +16 -16
  66. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +1 -0
  67. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +3 -2
  68. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +4 -4
  69. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +19 -19
  70. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +5 -4
  71. teradataml/data/docs/sqle/docs_17_20/Transform.py +2 -2
  72. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +9 -9
  73. teradataml/data/fish.csv +160 -0
  74. teradataml/data/glass_types.csv +215 -0
  75. teradataml/data/insurance.csv +1 -1
  76. teradataml/data/iris_data.csv +151 -0
  77. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +1 -0
  78. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +1 -0
  79. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +1 -0
  80. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +1 -0
  81. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +1 -0
  82. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +1 -0
  83. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +1 -0
  84. teradataml/data/load_example_data.py +3 -0
  85. teradataml/data/multi_model_classification.csv +401 -0
  86. teradataml/data/multi_model_regression.csv +401 -0
  87. teradataml/data/openml_example.json +63 -0
  88. teradataml/data/scripts/deploy_script.py +65 -0
  89. teradataml/data/scripts/mapper.R +20 -0
  90. teradataml/data/scripts/sklearn/__init__.py +0 -0
  91. teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
  92. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
  93. teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
  94. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
  95. teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
  96. teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
  97. teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
  98. teradataml/data/templates/open_source_ml.json +9 -0
  99. teradataml/data/teradataml_example.json +73 -1
  100. teradataml/data/test_classification.csv +101 -0
  101. teradataml/data/test_prediction.csv +101 -0
  102. teradataml/data/test_regression.csv +101 -0
  103. teradataml/data/train_multiclass.csv +101 -0
  104. teradataml/data/train_regression.csv +101 -0
  105. teradataml/data/train_regression_multiple_labels.csv +101 -0
  106. teradataml/data/wine_data.csv +1600 -0
  107. teradataml/dataframe/copy_to.py +79 -13
  108. teradataml/dataframe/data_transfer.py +8 -0
  109. teradataml/dataframe/dataframe.py +910 -311
  110. teradataml/dataframe/dataframe_utils.py +102 -5
  111. teradataml/dataframe/fastload.py +11 -3
  112. teradataml/dataframe/setop.py +15 -2
  113. teradataml/dataframe/sql.py +3735 -77
  114. teradataml/dataframe/sql_function_parameters.py +56 -5
  115. teradataml/dataframe/vantage_function_types.py +45 -1
  116. teradataml/dataframe/window.py +30 -29
  117. teradataml/dbutils/dbutils.py +18 -1
  118. teradataml/geospatial/geodataframe.py +18 -7
  119. teradataml/geospatial/geodataframecolumn.py +5 -0
  120. teradataml/hyperparameter_tuner/optimizer.py +910 -120
  121. teradataml/hyperparameter_tuner/utils.py +131 -37
  122. teradataml/lib/aed_0_1.dll +0 -0
  123. teradataml/lib/libaed_0_1.dylib +0 -0
  124. teradataml/lib/libaed_0_1.so +0 -0
  125. teradataml/libaed_0_1.dylib +0 -0
  126. teradataml/libaed_0_1.so +0 -0
  127. teradataml/opensource/__init__.py +1 -0
  128. teradataml/opensource/sklearn/__init__.py +1 -0
  129. teradataml/opensource/sklearn/_class.py +255 -0
  130. teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
  131. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  132. teradataml/opensource/sklearn/constants.py +54 -0
  133. teradataml/options/__init__.py +3 -6
  134. teradataml/options/configure.py +21 -20
  135. teradataml/scriptmgmt/UserEnv.py +61 -5
  136. teradataml/scriptmgmt/lls_utils.py +135 -53
  137. teradataml/table_operators/Apply.py +38 -6
  138. teradataml/table_operators/Script.py +45 -308
  139. teradataml/table_operators/TableOperator.py +182 -591
  140. teradataml/table_operators/__init__.py +0 -1
  141. teradataml/table_operators/table_operator_util.py +32 -40
  142. teradataml/utils/validators.py +127 -3
  143. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +243 -3
  144. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/RECORD +147 -391
  145. teradataml/analytics/mle/AdaBoost.py +0 -651
  146. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  147. teradataml/analytics/mle/Antiselect.py +0 -342
  148. teradataml/analytics/mle/Arima.py +0 -641
  149. teradataml/analytics/mle/ArimaPredict.py +0 -477
  150. teradataml/analytics/mle/Attribution.py +0 -1070
  151. teradataml/analytics/mle/Betweenness.py +0 -658
  152. teradataml/analytics/mle/Burst.py +0 -711
  153. teradataml/analytics/mle/CCM.py +0 -600
  154. teradataml/analytics/mle/CCMPrepare.py +0 -324
  155. teradataml/analytics/mle/CFilter.py +0 -460
  156. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  157. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  158. teradataml/analytics/mle/Closeness.py +0 -737
  159. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  160. teradataml/analytics/mle/Correlation.py +0 -477
  161. teradataml/analytics/mle/Correlation2.py +0 -573
  162. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  163. teradataml/analytics/mle/CoxPH.py +0 -556
  164. teradataml/analytics/mle/CoxSurvival.py +0 -478
  165. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  166. teradataml/analytics/mle/DTW.py +0 -623
  167. teradataml/analytics/mle/DWT.py +0 -564
  168. teradataml/analytics/mle/DWT2D.py +0 -599
  169. teradataml/analytics/mle/DecisionForest.py +0 -716
  170. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  171. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  172. teradataml/analytics/mle/DecisionTree.py +0 -830
  173. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  174. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  175. teradataml/analytics/mle/FMeasure.py +0 -402
  176. teradataml/analytics/mle/FPGrowth.py +0 -734
  177. teradataml/analytics/mle/FrequentPaths.py +0 -695
  178. teradataml/analytics/mle/GLM.py +0 -558
  179. teradataml/analytics/mle/GLML1L2.py +0 -547
  180. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  181. teradataml/analytics/mle/GLMPredict.py +0 -529
  182. teradataml/analytics/mle/HMMDecoder.py +0 -945
  183. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  184. teradataml/analytics/mle/HMMSupervised.py +0 -521
  185. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  186. teradataml/analytics/mle/Histogram.py +0 -561
  187. teradataml/analytics/mle/IDWT.py +0 -476
  188. teradataml/analytics/mle/IDWT2D.py +0 -493
  189. teradataml/analytics/mle/IdentityMatch.py +0 -763
  190. teradataml/analytics/mle/Interpolator.py +0 -918
  191. teradataml/analytics/mle/KMeans.py +0 -485
  192. teradataml/analytics/mle/KNN.py +0 -627
  193. teradataml/analytics/mle/KNNRecommender.py +0 -488
  194. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  195. teradataml/analytics/mle/LAR.py +0 -439
  196. teradataml/analytics/mle/LARPredict.py +0 -478
  197. teradataml/analytics/mle/LDA.py +0 -548
  198. teradataml/analytics/mle/LDAInference.py +0 -492
  199. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  200. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  201. teradataml/analytics/mle/LinReg.py +0 -433
  202. teradataml/analytics/mle/LinRegPredict.py +0 -438
  203. teradataml/analytics/mle/MinHash.py +0 -544
  204. teradataml/analytics/mle/Modularity.py +0 -587
  205. teradataml/analytics/mle/NEREvaluator.py +0 -410
  206. teradataml/analytics/mle/NERExtractor.py +0 -595
  207. teradataml/analytics/mle/NERTrainer.py +0 -458
  208. teradataml/analytics/mle/NGrams.py +0 -570
  209. teradataml/analytics/mle/NPath.py +0 -634
  210. teradataml/analytics/mle/NTree.py +0 -549
  211. teradataml/analytics/mle/NaiveBayes.py +0 -462
  212. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  213. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  214. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  215. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  216. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  217. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  218. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  219. teradataml/analytics/mle/POSTagger.py +0 -417
  220. teradataml/analytics/mle/Pack.py +0 -411
  221. teradataml/analytics/mle/PageRank.py +0 -535
  222. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  223. teradataml/analytics/mle/PathGenerator.py +0 -367
  224. teradataml/analytics/mle/PathStart.py +0 -464
  225. teradataml/analytics/mle/PathSummarizer.py +0 -470
  226. teradataml/analytics/mle/Pivot.py +0 -471
  227. teradataml/analytics/mle/ROC.py +0 -425
  228. teradataml/analytics/mle/RandomSample.py +0 -637
  229. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  230. teradataml/analytics/mle/SAX.py +0 -779
  231. teradataml/analytics/mle/SVMDense.py +0 -677
  232. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  233. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  234. teradataml/analytics/mle/SVMSparse.py +0 -557
  235. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  236. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  237. teradataml/analytics/mle/Sampling.py +0 -549
  238. teradataml/analytics/mle/Scale.py +0 -565
  239. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  240. teradataml/analytics/mle/ScaleMap.py +0 -378
  241. teradataml/analytics/mle/ScaleSummary.py +0 -320
  242. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  243. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  244. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  245. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  246. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  247. teradataml/analytics/mle/Sessionize.py +0 -475
  248. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  249. teradataml/analytics/mle/StringSimilarity.py +0 -425
  250. teradataml/analytics/mle/TF.py +0 -389
  251. teradataml/analytics/mle/TFIDF.py +0 -504
  252. teradataml/analytics/mle/TextChunker.py +0 -414
  253. teradataml/analytics/mle/TextClassifier.py +0 -399
  254. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  255. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  256. teradataml/analytics/mle/TextMorph.py +0 -494
  257. teradataml/analytics/mle/TextParser.py +0 -623
  258. teradataml/analytics/mle/TextTagger.py +0 -530
  259. teradataml/analytics/mle/TextTokenizer.py +0 -502
  260. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  261. teradataml/analytics/mle/Unpack.py +0 -526
  262. teradataml/analytics/mle/Unpivot.py +0 -438
  263. teradataml/analytics/mle/VarMax.py +0 -776
  264. teradataml/analytics/mle/VectorDistance.py +0 -762
  265. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  266. teradataml/analytics/mle/XGBoost.py +0 -842
  267. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  268. teradataml/analytics/mle/__init__.py +0 -123
  269. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  270. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  271. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  272. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  273. teradataml/analytics/mle/json/arima_mle.json +0 -172
  274. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  275. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  276. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  277. teradataml/analytics/mle/json/burst_mle.json +0 -140
  278. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  279. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  280. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  281. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  282. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  283. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  284. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  285. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  286. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  287. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  288. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  289. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  290. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  291. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  292. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  293. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  294. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  295. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  296. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  297. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  298. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  299. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  300. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  301. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  302. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  303. teradataml/analytics/mle/json/glm_mle.json +0 -111
  304. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  305. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  306. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  307. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  308. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  309. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  310. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  311. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  312. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  313. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  314. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  315. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  316. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  317. teradataml/analytics/mle/json/knn_mle.json +0 -141
  318. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  319. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  320. teradataml/analytics/mle/json/lar_mle.json +0 -78
  321. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  322. teradataml/analytics/mle/json/lda_mle.json +0 -130
  323. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  324. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  325. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  326. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  327. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  328. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  329. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  330. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  331. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  332. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  333. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  334. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  335. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  336. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  337. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  338. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  339. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  340. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  341. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  342. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  343. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  344. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  345. teradataml/analytics/mle/json/pack_mle.json +0 -58
  346. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  347. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  348. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  349. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  350. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  351. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  352. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  353. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  354. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  355. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  356. teradataml/analytics/mle/json/roc_mle.json +0 -73
  357. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  358. teradataml/analytics/mle/json/sax_mle.json +0 -154
  359. teradataml/analytics/mle/json/scale_mle.json +0 -93
  360. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  361. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  362. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  363. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  364. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  365. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  366. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  367. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  368. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  369. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  370. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  371. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  372. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  373. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  374. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  375. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  376. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  377. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  378. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  379. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  380. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  381. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  382. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  383. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  384. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  385. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  386. teradataml/analytics/mle/json/tf_mle.json +0 -33
  387. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  388. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  389. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  390. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  391. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  392. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  393. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  394. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  395. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  396. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  397. teradataml/analytics/sqle/Antiselect.py +0 -321
  398. teradataml/analytics/sqle/Attribution.py +0 -603
  399. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  400. teradataml/analytics/sqle/GLMPredict.py +0 -430
  401. teradataml/analytics/sqle/MovingAverage.py +0 -543
  402. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  403. teradataml/analytics/sqle/NPath.py +0 -632
  404. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  405. teradataml/analytics/sqle/Pack.py +0 -388
  406. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  407. teradataml/analytics/sqle/Sessionize.py +0 -390
  408. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  409. teradataml/analytics/sqle/Unpack.py +0 -503
  410. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  411. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  412. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  413. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  414. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  415. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  416. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  417. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  418. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  419. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  420. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  421. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  422. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  423. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  424. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  425. teradataml/catalog/model_cataloging.py +0 -980
  426. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  427. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  428. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  429. teradataml/table_operators/sandbox_container_util.py +0 -643
  430. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +0 -0
  431. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
  432. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +0 -0
@@ -0,0 +1,1628 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2024 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Sweta Shaw
7
+ # Email Id: Sweta.Shaw@Teradata.com
8
+ #
9
+ # Secondary Owner: Akhil Bisht
10
+ # Email Id: AKHIL.BISHT@Teradata.com
11
+ #
12
+ # Version: 1.1
13
+ # Function Version: 1.0
14
+ # ##################################################################
15
+
16
+ # Python libraries
17
+ import json
18
+ import numpy as np
19
+ from sklearn.metrics import confusion_matrix
20
+ import time
21
+
22
+ # Teradata libraries
23
+ from teradataml.dataframe.copy_to import copy_to_sql
24
+ from teradataml import ColumnExpression
25
+ from teradataml.dataframe.dataframe import DataFrame
26
+ from teradataml.utils.validators import _Validators
27
+ from teradataml import ROC
28
+ from teradataml.common.utils import UtilFuncs
29
+ from teradataml.utils.dtypes import _Dtypes
30
+ from teradataml.common.utils import UtilFuncs
31
+ from teradataml import TeradataMlException
32
+ from teradataml.common.messages import Messages, MessageCodes
33
+
34
+ # AutoML Internal libraries
35
+ from teradataml.automl.data_preparation import _DataPreparation
36
+ from teradataml.automl.feature_engineering import _FeatureEngineering
37
+ from teradataml.automl.feature_exploration import _FeatureExplore, _is_terminal
38
+ from teradataml.automl.model_evaluation import _ModelEvaluator
39
+ from teradataml.automl.model_training import _ModelTraining
40
+ from teradataml.automl.data_transformation import _DataTransformation
41
+ from teradataml.automl.custom_json_utils import _GenerateCustomJson
42
+
43
+
44
+ class AutoML:
45
+
46
+ def __init__(self,
47
+ task_type = "Default",
48
+ include = None,
49
+ exclude = None,
50
+ verbose = 0,
51
+ max_runtime_secs = None,
52
+ stopping_metric = None,
53
+ stopping_tolerance = None,
54
+ custom_config_file = None):
55
+ """
56
+ DESCRIPTION:
57
+ AutoML (Automated Machine Learning) is an approach that automates the process
58
+ of building, training, and validating machine learning models. It involves
59
+ various algorithms to automate various aspects of the machine learning workflow,
60
+ such as data preparation, feature engineering, model selection, hyperparameter
61
+ tuning, and model deployment. It aims to simplify the process of building
62
+ machine learning models, by automating some of the more time-consuming
63
+ and labor-intensive tasks involved in the process.
64
+
65
+ AutoML is designed to handle both regression and classification (binary and
66
+ multiclass) tasks. User can specify the task type whether to apply
67
+ regression OR classification algorithm on the provided dataset. By default, AutoML
68
+ decides the task type.
69
+
70
+ AutoML by default, trains using all model algorithms applicable for the
71
+ task type problem. For example, "glm" and "svm" does not support multi-class
72
+ classification problem. Thus, only 3 models are available to train in case
73
+ of multi-class classification problem, by default. While for regression and
74
+ binary classification problem, all 5 models i.e., "glm", "svm", "knn",
75
+ "decision_forest", "xgboost" are available to train by default.
76
+
77
+ AutoML provides functionality to use specific model algorithms for training.
78
+ User can provide either include or exclude model. In case of include,
79
+ only specified models are trained while for exclude, all models except
80
+ specified model are trained.
81
+
82
+ AutoML also provides an option to customize the processes within feature
83
+ engineering, data preparation and model training phases. User can customize
84
+ the processes by passing the JSON file path in case of custom run. It also
85
+ supports early stopping of model training based on stopping metrics and
86
+ maximum running time.
87
+
88
+ PARAMETERS:
89
+ task_type:
90
+ Optional Arugment.
91
+ Specifies the task type for AutoML, whether to apply regression OR classification
92
+ on the provided dataset. If user wants AutoML to decide the task type automatically,
93
+ then it should be set to "Default".
94
+ Default Value: "Default"
95
+ Permitted Values: "Regression", "Classification", "Default"
96
+ Types: str
97
+
98
+ include:
99
+ Optional Argument.
100
+ Specifies the model algorithms to be used for model training phase.
101
+ By default, all 5 models are used for training for regression and binary
102
+ classification problem, while only 3 models are used for multi-class.
103
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
104
+ Types: str OR list of str
105
+
106
+
107
+ exclude:
108
+ Optional Argument.
109
+ Specifies the model algorithms to be excluded from model training phase.
110
+ No model is excluded by default.
111
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
112
+ Types: str OR list of str
113
+
114
+ verbose:
115
+ Optional Argument.
116
+ Specifies the detailed execution steps based on verbose level.
117
+ Default Value: 0
118
+ Permitted Values:
119
+ * 0: prints the progress bar and leaderboard
120
+ * 1: prints the execution steps of AutoML.
121
+ * 2: prints the intermediate data between the execution of each step of AutoML.
122
+ Types: int
123
+
124
+ max_runtime_secs:
125
+ Optional Arugment.
126
+ Specifies the time limit in seconds for model training.
127
+ Types: int
128
+
129
+ stopping_metric:
130
+ Required, when "stopping_tolerance" is set, otherwise optional.
131
+ Specifies the stopping metrics for stopping tolerance in model training.
132
+ Permitted Values:
133
+ * For task_type "Regression": "R2", "MAE", "MSE", "MSLE",
134
+ "RMSE", "RMSLE"
135
+ * For task_type "Classification": 'MICRO-F1','MACRO-F1',
136
+ 'MICRO-RECALL','MACRO-RECALL',
137
+ 'MICRO-PRECISION', 'MACRO-PRECISION',
138
+ 'WEIGHTED-PRECISION','WEIGHTED-RECALL',
139
+ 'WEIGHTED-F1', 'ACCURACY'
140
+ Types: str
141
+
142
+ stopping_tolerance:
143
+ Required, when "stopping_metric" is set, otherwise optional.
144
+ Specifies the stopping tolerance for stopping metrics in model training.
145
+ Types: float
146
+
147
+ custom_config_file:
148
+ Optional Argument.
149
+ Specifies the path of JSON file in case of custom run.
150
+ Types: str
151
+
152
+ RETURNS:
153
+ Instance of AutoML.
154
+
155
+ RAISES:
156
+ TeradataMlException, TypeError, ValueError
157
+
158
+ EXAMPLES:
159
+ # Notes:
160
+ # 1. Get the connection to Vantage to execute the function.
161
+ # 2. One must import the required functions mentioned in
162
+ # the example from teradataml.
163
+ # 3. Function raises error if not supported on the Vantage
164
+ # user is connected to.
165
+
166
+ # Load the example data.
167
+ >>> load_example_data("GLMPredict", ["admissions_test", "admissions_train"])
168
+ >>> load_example_data("decisionforestpredict", ["housing_train", "housing_test"])
169
+ >>> load_example_data("teradataml", "iris_input")
170
+
171
+ # Create teradataml DataFrames.
172
+ >>> admissions_train = DataFrame.from_table("admissions_train")
173
+ >>> admissions_test = DataFrame.from_table("admissions_test")
174
+ >>> housing_train = DataFrame.from_table("housing_train")
175
+ >>> housing_test = DataFrame.from_table("housing_test")
176
+ >>> iris_input = DataFrame.from_table("iris_input")
177
+
178
+ # Example 1: Run AutoML for classification problem.
179
+ # Scenario: Predict whether a student will be admitted to a university
180
+ # based on different factors. Run AutoML to get the best
181
+ # performing model out of available models.
182
+
183
+ # Create an instance of AutoML.
184
+ >>> automl_obj = AutoML(task_type="Classification")
185
+
186
+ # Fit the data.
187
+ >>> automl_obj.fit(admissions_train, "admitted")
188
+
189
+ # Run predict with best performing model.
190
+ >>> prediction = automl_obj.predict()
191
+ >>> prediction
192
+
193
+ # Run predict for new test data with best performing model.
194
+ >>> prediction = automl_obj.predict(admissions_test)
195
+ >>> prediction
196
+
197
+ # Run predict for new test data with second best performing model.
198
+ >>> prediction = automl_obj.predict(admissions_test, rank=2)
199
+ >>> prediction
200
+
201
+ # Display leaderboard.
202
+ >>> automl_obj.leaderboard()
203
+
204
+ # Display best performing model.
205
+ >>> automl_obj.leader()
206
+
207
+ # Example 2 : Run AutoML for regression problem.
208
+ # Scenario : Predict the price of house based on different factors.
209
+ # Run AutoML to get the best performing model using custom
210
+ # configuration file to customize different processes of
211
+ # AutoML Run. Use include to specify "xgbooost" and
212
+ # "decision_forset" models to be used for training.
213
+
214
+ # Generate custom JSON file
215
+ >>> AutoML.generate_custom_config("custom_housing")
216
+
217
+ # Create instance of AutoML.
218
+ >>> automl_obj = AutoML(task_type="Regression",
219
+ >>> verbose=1,
220
+ >>> include=["decision_forest", "xgboost"],
221
+ >>> custom_config_file="custom_housing.json")
222
+ # Fit the data.
223
+ >>> automl_obj.fit(housing_train, "price")
224
+
225
+ # Run predict with best performing model.
226
+ >>> prediction = automl_obj.predict()
227
+ >>> prediction
228
+
229
+ # Run predict for new test data with best performing model.
230
+ >>> prediction = automl_obj.predict(housing_test)
231
+ >>> prediction
232
+
233
+ # Run predict for new test data with second best performing model.
234
+ >>> prediction = automl_obj.predict(housing_test, rank=2)
235
+ >>> prediction
236
+
237
+ # Display leaderboard.
238
+ >>> automl_obj.leaderboard()
239
+
240
+ # Display best performing model.
241
+ >>> automl_obj.leader()
242
+
243
+ # Example 3 : Run AutoML for multiclass classification problem.
244
+ # Scenario : Predict the species of iris flower based on different
245
+ # factors. Use custom configuration file to customize
246
+ # different processes of AutoML Run to get the best
247
+ # performing model out of available models.
248
+
249
+ # Generate custom JSON file
250
+ >>> AutoML.generate_custom_config()
251
+
252
+ # Create instance of AutoML.
253
+ >>> automl_obj = AutoML(verbose=2,
254
+ >>> exclude="xgboost",
255
+ >>> custom_config_file="custom.json")
256
+ # Fit the data.
257
+ >>> automl_obj.fit(iris_input, iris_input.species)
258
+
259
+ # Run predict with best performing model.
260
+ >>> prediction = automl_obj.predict()
261
+ >>> prediction
262
+
263
+ # Run predict with second best performing model.
264
+ >>> prediction = automl_obj.predict(rank=2)
265
+ >>> prediction
266
+
267
+ # Display leaderboard.
268
+ >>> automl_obj.leaderboard()
269
+
270
+ # Display best performing model.
271
+ >>> automl_obj.leader()
272
+
273
+ # Example 4 : Run AutoML for regression problem with early stopping metric and tolerance.
274
+ # Scenario : Predict the price of house based on different factors.
275
+ # Use custom configuration file to customize different
276
+ # processes of AutoML Run. Define performance threshold
277
+ # to acquire for the available models, and terminate training
278
+ # upon meeting the stipulated performance criteria.
279
+
280
+ # Generate custom JSON file
281
+ >>> AutoML.generate_custom_config("custom_housing")
282
+
283
+ # Create instance of AutoML.
284
+ >>> automl_obj = AutoML(verbose=2,
285
+ >>> exclude="xgboost",
286
+ >>> stopping_metric="R2",
287
+ >>> stopping_tolerance=0.7,
288
+ >>> custom_config_file="custom_housing.json")
289
+ # Fit the data.
290
+ >>> automl_obj.fit(housing_train, "price")
291
+
292
+ # Run predict with best performing model.
293
+ >>> prediction = automl_obj.predict()
294
+ >>> prediction
295
+
296
+ # Display leaderboard.
297
+ >>> automl_obj.leaderboard()
298
+
299
+ # Example 5 : Run AutoML for regression problem with maximum runtime.
300
+ # Scenario : Predict the species of iris flower based on different factors.
301
+ # Run AutoML to get the best performing model in specified time.
302
+
303
+ # Create instance of AutoML.
304
+ >>> automl_obj = AutoML(verbose=2,
305
+ >>> exclude="xgboost",
306
+ >>> max_runtime_secs=500)
307
+ # Fit the data.
308
+ >>> automl_obj.fit(iris_input, iris_input.species)
309
+
310
+ # Run predict with best performing model.
311
+ >>> prediction = automl_obj.predict()
312
+ >>> prediction
313
+
314
+ # Run predict with second best performing model.
315
+ >>> prediction = automl_obj.predict(rank=2)
316
+ >>> prediction
317
+
318
+ # Display leaderboard.
319
+ >>> automl_obj.leaderboard()
320
+
321
+ # Display best performing model.
322
+ >>> automl_obj.leader()
323
+ """
324
+ # Appending arguments to list for validation
325
+ arg_info_matrix = []
326
+ arg_info_matrix.append(["task_type", task_type, True, (str), True, ["Regression", "Classification", "Default"]])
327
+ arg_info_matrix.append(["include", include, True, (str, list), True, ["glm", "svm", "knn",
328
+ "decision_forest", "xgboost"]])
329
+ arg_info_matrix.append(["exclude", exclude, True, (str, list), True, ["glm", "svm", "knn",
330
+ "decision_forest", "xgboost"]])
331
+ arg_info_matrix.append(["verbose", verbose, True, (int), True, [0,1,2]])
332
+ arg_info_matrix.append(["max_runtime_secs", max_runtime_secs, True, (int, float)])
333
+ arg_info_matrix.append(["stopping_metric", stopping_metric, True, (str), True, ["R2", 'MAE',
334
+ 'MSE', 'MSLE',
335
+ 'RMSE', 'RMSLE',
336
+ 'MICRO-F1','MACRO-F1',
337
+ 'MICRO-RECALL','MACRO-RECALL',
338
+ 'MICRO-PRECISION', 'MACRO-PRECISION',
339
+ 'WEIGHTED-PRECISION','WEIGHTED-RECALL',
340
+ 'WEIGHTED-F1', 'ACCURACY']])
341
+ arg_info_matrix.append(["stopping_tolerance", stopping_tolerance, True, (float, int)])
342
+ arg_info_matrix.append(["custom_config_file", custom_config_file, True, (str), True])
343
+
344
+
345
+ # Validate argument types
346
+ _Validators._validate_function_arguments(arg_info_matrix)
347
+ # Either include or exclude can be used.
348
+ if include is not None or exclude is not None:
349
+ _Validators._validate_mutually_exclusive_arguments(include, "include", exclude, "exclude")
350
+ # Validate mutually inclusive arguments
351
+ _Validators._validate_mutually_inclusive_arguments(stopping_metric, "stopping_metric", stopping_tolerance, "stopping_tolerance")
352
+
353
+ custom_data = None
354
+ self.auto = True
355
+ # Validate custom file
356
+ if custom_config_file:
357
+ # Performing validation
358
+ _Validators._validate_file_exists(custom_config_file)
359
+ _Validators._validate_file_extension(custom_config_file, "json")
360
+ _Validators._check_empty_file(custom_config_file)
361
+ # Setting auto to False
362
+ self.auto = False
363
+ # Loading file
364
+ with open(custom_config_file, 'r') as json_file:
365
+ custom_data = json.load(json_file)
366
+
367
+ # Initializing class variables
368
+ self.data = None
369
+ self.target_column = None
370
+ self.custom_data = custom_data
371
+ self.task_type = task_type
372
+ self.include_model = include
373
+ self.exclude_model = exclude
374
+ self.verbose = verbose
375
+ self.max_runtime_secs = max_runtime_secs
376
+ self.stopping_metric = stopping_metric
377
+ self.stopping_tolerance = stopping_tolerance
378
+ self.model_list = ['decision_forest', 'xgboost', 'knn', 'svm', 'glm']
379
+ self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
380
+ self._is_fit_called = False
381
+
382
+ def fit(self,
383
+ data,
384
+ target_column):
385
+ """
386
+ DESCRIPTION:
387
+ Function triggers the AutoML run. It is designed to handle both
388
+ regression and classification tasks depending on the specified "task_type".
389
+
390
+ PARAMETERS:
391
+ data:
392
+ Required Argument.
393
+ Specifies the input teradataml DataFrame.
394
+ Types: teradataml Dataframe
395
+
396
+ target_column:
397
+ Required Arugment.
398
+ Specifies target column of dataset.
399
+ Types: str or ColumnExpression
400
+
401
+ RETURNS:
402
+ None
403
+
404
+ RAISES:
405
+ TeradataMlException, TypeError, ValueError
406
+
407
+ EXAMPLES:
408
+ # Create an instance of the AutoML called "automl_obj"
409
+ # by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
410
+ # Perform fit() operation on the "automl_obj".
411
+
412
+ # Example 1: Passing column expression for target column.
413
+ >>> automl_obj.fit(data = housing_train, target_col = housing_train.price)
414
+
415
+ # Example 2: Passing name of target column.
416
+ >>> automl_obj.fit(data = housing_train, target_col = "price")
417
+ """
418
+
419
+ self._is_fit_called = True
420
+ # Checking if target column is of type ColumnExpression
421
+ if isinstance(target_column, ColumnExpression):
422
+ target_column = target_column.name
423
+
424
+ # Appending fit arguments to list for validation
425
+ arg_info_fit_matrix = []
426
+ arg_info_fit_matrix.append(["data", data, False, (DataFrame), True])
427
+ arg_info_fit_matrix.append(["target_column", target_column, False, (str), True])
428
+
429
+ # Validate argument types
430
+ _Validators._validate_function_arguments(arg_info_fit_matrix)
431
+
432
+ # Initializing class variables
433
+ self.data = data
434
+ self.target_column = target_column
435
+
436
+ # Checking if include model list is present
437
+ if self.include_model:
438
+ # Converting to list if passed as string
439
+ self.include_model = UtilFuncs._as_list(self.include_model)
440
+ # Updating model list based on include list
441
+ self.model_list = list(set(self.include_model))
442
+ self.model_list = [model.lower() for model in self.model_list]
443
+
444
+ # Checking if exclude model list is present
445
+ if self.exclude_model:
446
+ # Converting to list if passed as string
447
+ self.exclude_model = UtilFuncs._as_list(self.exclude_model)
448
+ # Updating model list based on exclude list
449
+ self.model_list = list(set(self.model_list) - set(self.exclude_model))
450
+ self.model_list = [model.lower() for model in self.model_list]
451
+
452
+ # Checking if target column is present in data
453
+ _Validators._validate_dataframe_has_argument_columns(self.target_column, "target_column", self.data, "df")
454
+
455
+ # Handling default task type
456
+ if self.task_type.casefold() == "default":
457
+ # if target column is having distinct values less than or equal to 20,
458
+ # then it will be mapped to classification problem else regression problem
459
+ if self.data.drop_duplicate(self.target_column).size <= 20:
460
+ print("\nTask type is set to Classification as target column "
461
+ "is having distinct values less than or equal to 20.")
462
+ self.task_type = "Classification"
463
+ else:
464
+ print("\nTask type is set to Regression as target column is "
465
+ "having distinct values greater than 20.")
466
+ self.task_type = "Regression"
467
+
468
+ if self.is_classification_type():
469
+ if self.stopping_metric is not None:
470
+ permitted_values = ["MICRO-F1", "MACRO-F1",
471
+ "MICRO-RECALL", "MACRO-RECALL",
472
+ "MICRO-PRECISION", "MACRO-PRECISION",
473
+ "WEIGHTED-PRECISION", "WEIGHTED-RECALL",
474
+ "WEIGHTED-F1", "ACCURACY"]
475
+ _Validators._validate_permitted_values(self.stopping_metric, permitted_values, "stopping_metric")
476
+ else:
477
+ if self.stopping_metric is not None:
478
+ permitted_values = ["R2", 'MAE', 'MSE', 'MSLE','RMSE', 'RMSLE']
479
+ _Validators._validate_permitted_values(self.stopping_metric, permitted_values, "stopping_metric")
480
+
481
+ if not self.is_classification_type():
482
+ _Validators._validate_column_type(self.data, self.target_column, 'target_column',
483
+ expected_types=UtilFuncs()._get_numeric_datatypes())
484
+
485
+ # Displaying received custom input
486
+ if self.custom_data:
487
+ print("\n Received below input for customization : ")
488
+ print(json.dumps(self.custom_data, indent=4))
489
+
490
+ # Classification probelm
491
+ task_cls = _Classification
492
+ cls_method = "_classification"
493
+
494
+ # Regression problem
495
+ if self.task_type.casefold() == "regression":
496
+ task_cls = _Regression
497
+ cls_method = "_regression"
498
+
499
+ # Running AutoML
500
+ clf = task_cls(self.data, self.target_column, self.custom_data)
501
+
502
+ self.model_info, self.leader_board, self.target_count, self.target_label, \
503
+ self.data_transformation_params, self.table_name_mapping = getattr(clf, cls_method)(
504
+ model_list = self.model_list,
505
+ auto = self.auto,
506
+ verbose = self.verbose,
507
+ max_runtime_secs = self.max_runtime_secs,
508
+ stopping_metric = self.stopping_metric,
509
+ stopping_tolerance = self.stopping_tolerance
510
+ )
511
+ # Model Evaluation Phase
512
+ self.m_evaluator = _ModelEvaluator(self.model_info,
513
+ self.target_column,
514
+ self.task_type)
515
+
516
+ def predict(self,
517
+ data = None,
518
+ rank = 1):
519
+ """
520
+ DESCRIPTION:
521
+ Function generates prediction on either default test data or any other data
522
+ using model rank in leaderboard and displays performance metrics
523
+ of the specified model.
524
+
525
+ If test data contains target column, then it displays both prediction
526
+ and performance metrics, otherwise displays only prediction.
527
+
528
+ PARAMETERS:
529
+ data:
530
+ Optional Argument.
531
+ Specifies the dataset on which prediction and performance
532
+ metrices needs to be generated using model rank in leaderboard.
533
+ When "data" is not specified default test data is used. Default
534
+ test data is the dataset generated at the time of training.
535
+ Types: teradataml DataFrame
536
+
537
+ rank:
538
+ Optional Argument.
539
+ Specifies the rank of the model in the leaderboard to be used for prediction.
540
+ Default Value: 1
541
+ Types: int
542
+
543
+ RETURNS:
544
+ Pandas DataFrame with predictions.
545
+
546
+ RAISES:
547
+ TeradataMlException, TypeError, ValueError
548
+
549
+ EXAMPLES:
550
+ # Create an instance of the AutoML called "automl_obj"
551
+ # by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
552
+ # Perform fit() operation on the "automl_obj".
553
+ # Perform predict() operation on the "automl_obj".
554
+
555
+ # Example 1: Run predict with best performing model.
556
+ >>> prediction = automl_obj.predict()
557
+ >>> prediction
558
+
559
+ # Example 2: Run predict with second best performing model.
560
+ >>> prediction = automl_obj.predict(rank=2)
561
+ >>> prediction
562
+
563
+ # Example 3: Run predict for new test data with best performing model.
564
+ >>> prediction = automl_obj.predict(admissions_test)
565
+ >>> prediction
566
+
567
+ # Example 4: Run predict for new test data with second best performing model.
568
+ >>> prediction = automl_obj.predict(admissions_test, rank=2)
569
+ >>> prediction
570
+ """
571
+ if not self._is_fit_called:
572
+ # raise ValueError("fit() method must be called before generating prediction.")
573
+ err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
574
+ "'predict' method", \
575
+ "'fit' method must be called before" \
576
+ " running predict.")
577
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
578
+ # Appending predict arguments to list for validation.
579
+ arg_info_pred_matrix = []
580
+ arg_info_pred_matrix.append(["data", data, True, (DataFrame), True])
581
+ arg_info_pred_matrix.append(["rank", rank, True, (int), True])
582
+
583
+ # Validate argument types
584
+ _Validators._validate_function_arguments(arg_info_pred_matrix)
585
+
586
+ # Setting test data indicator to default value, i.e., False.
587
+ self.test_data_ind = False
588
+ # Setting target column indicator to default value, i.e., False.
589
+ self.target_column_ind = False
590
+ # Model Evaluation using rank-1 [rank starts from 0 in leaderboard]
591
+ rank = rank-1
592
+
593
+ # Checking if there is test data provided or not.
594
+ # If no, then model will generate predicion on default test data.
595
+ # If yes, then at first data transformation will happen then prediction will be generated.
596
+ if data is None:
597
+ metrics, pred = self.m_evaluator.model_evaluation(rank = rank,
598
+ table_name_mapping=self.table_name_mapping)
599
+ else:
600
+ # Setting test data indicator to True
601
+ self.test_data_ind = True
602
+ # Setting indicator to True if target column exists
603
+ if self.target_column in data.columns:
604
+ self.target_column_ind = True
605
+
606
+ # Data Transformation Phase
607
+ data_transform_instance = _DataTransformation(data = data,
608
+ data_transformation_params = \
609
+ self.data_transformation_params,
610
+ auto = self.auto,
611
+ verbose = self.verbose,
612
+ target_column_ind = self.target_column_ind,
613
+ table_name_mapping=self.table_name_mapping)
614
+
615
+ self.table_name_mapping = data_transform_instance.data_transformation()
616
+
617
+ # Checking for target column presence in passed test data.
618
+ # If present, then both prediction and evaluation metrics will be generated.
619
+ # If not present, then only prediction will be generated.
620
+ if self.target_column_ind:
621
+ metrics, pred = self.m_evaluator.model_evaluation(rank = rank,
622
+ test_data_ind = \
623
+ self.test_data_ind,
624
+ target_column_ind = \
625
+ self.target_column_ind,
626
+ table_name_mapping=self.table_name_mapping)
627
+ else:
628
+ pred = self.m_evaluator.model_evaluation(rank = rank,
629
+ test_data_ind = \
630
+ self.test_data_ind,
631
+ table_name_mapping=self.table_name_mapping)
632
+ # Checking if problem type is classification and target label is present.
633
+ if self.is_classification_type() and self.target_label is not None:
634
+ # Displaying target column labels
635
+ tar_dct = {}
636
+ print('Target Column Mapping:')
637
+ # Iterating rows
638
+ for row in self.target_label.result.itertuples():
639
+ # Retrieving the category names of encoded target column
640
+ # row[1] contains the orginal name of cateogry
641
+ # row[2] contains the encoded value
642
+ if row[1] != 'TD_CATEGORY_COUNT':
643
+ tar_dct[row[1]] = row[2]
644
+
645
+ for key, value in tar_dct.items():
646
+ print(f"{key}: {value}")
647
+
648
+ print("\n Prediction : ")
649
+ print(pred.result)
650
+
651
+ # Showing performance metrics if there is no test data
652
+ # Or if target column is present in test data.
653
+ if not self.test_data_ind or self.target_column_ind:
654
+ print("\n Performance Metrics : ")
655
+ print(metrics.result)
656
+
657
+ prediction_column = 'prediction' if 'prediction' in pred.result.columns else 'Prediction'
658
+
659
+ # Displaying confusion matrix and ROC-AUC for classification problem
660
+ if self.is_classification_type():
661
+ print_data = lambda data: print(data) if _is_terminal() else display(data)
662
+ # Displaying ROC-AUC for binary classification
663
+ if self.target_count == 2:
664
+ fit_params = {
665
+ "probability_column" : prediction_column,
666
+ "observation_column" : self.target_column,
667
+ "positive_class" : "1",
668
+ "data" : pred.result
669
+ }
670
+ # Fitting ROC
671
+ roc_out = ROC(**fit_params)
672
+ print("\n ROC-AUC : ")
673
+ print_data(roc_out.result)
674
+ print_data(roc_out.output_data)
675
+
676
+ # Displaying confusion matrix for binary and multiclass classification
677
+ prediction_df=pred.result.to_pandas()
678
+ target_col = self.target_column
679
+ print("\n Confusion Matrix : ")
680
+ print_data(confusion_matrix(prediction_df[target_col], prediction_df[prediction_column]))
681
+
682
+ # Returning prediction
683
+ return pred.result
684
+
685
+ def leaderboard(self):
686
+ """
687
+ DESCRIPTION:
688
+ Function displays leaderboard.
689
+
690
+ RETURNS:
691
+ Pandas DataFrame with Leaderboard information.
692
+
693
+ RAISES:
694
+ TeradataMlException.
695
+
696
+ EXAMPLES:
697
+ # Create an instance of the AutoML called "automl_obj"
698
+ # by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
699
+ # Perform fit() operation on the "automl_obj".
700
+ # Generate leaderboard using leaderboard() method on "automl_obj".
701
+ >>> automl_obj.leaderboard()
702
+ """
703
+ if not self._is_fit_called:
704
+ # raise ValueError("fit() method must be called before generating leaderboard.")
705
+ err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
706
+ "'leaderboard' method", \
707
+ "'fit' method must be called before" \
708
+ " generating leaderboard.")
709
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
710
+ return self.leader_board
711
+
712
+ def leader(self):
713
+ """
714
+ DESCRIPTION:
715
+ Function displays best performing model.
716
+
717
+ RETURNS:
718
+ None
719
+
720
+ RAISES:
721
+ TeradataMlException.
722
+
723
+ EXAMPLES:
724
+ # Create an instance of the AutoML called "automl_obj"
725
+ # by referring "AutoML() or AutoRegressor() or AutoClassifier()" method.
726
+ # Perform fit() operation on the "automl_obj".
727
+ # Generate leaderboard using leaderboard() method on "automl_obj".
728
+ # Display best performing model using leader() method on "automl_obj".
729
+ >>> automl_obj.leader()
730
+ """
731
+ if not self._is_fit_called:
732
+ # raise ValueError("fit() method must be called before generating leader.")
733
+ err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
734
+ "'leader' method", \
735
+ "'fit' method must be called before" \
736
+ " generating leader.")
737
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
738
+ record = self.leader_board
739
+ if not _is_terminal():
740
+ display(record[record['Rank'] == 1])
741
+ else:
742
+ print(record[record['Rank'] == 1])
743
+
744
+ @staticmethod
745
+ def generate_custom_config(file_name = "custom"):
746
+ """
747
+ DESCRIPTION:
748
+ Function generates custom JSON file containing user customized input under current
749
+ working directory which can be used for AutoML execution.
750
+
751
+ PARAMETERS:
752
+ file_name:
753
+ Optional Argument.
754
+ Specifies the name of the file to be generated. Do not pass the file name
755
+ with extension. Extension '.json' is automatically added to specified file name.
756
+ Default Value: "custom"
757
+ Types: str
758
+
759
+ RETURNS:
760
+ None
761
+
762
+ EXAMPLES:
763
+ # Import either of AutoML or AutoClassifier or AutoRegressor from teradataml.
764
+ # As per requirement, generate json file using generate_custom_config() method.
765
+
766
+ # Generate a default file named "custom.json" file using either of below options.
767
+ >>> AutoML.generate_custom_config()
768
+ or
769
+ >>> AutoClassifier.generate_custom_config()
770
+ or
771
+ >>> AutoRegressor.generate_custom_config()
772
+ # The above code will generate "custom.json" file under the current working directory.
773
+
774
+ # Generate different file name using "file_name" argument.
775
+ >>> AutoML.generate_custom_config("titanic_custom")
776
+ or
777
+ >>> AutoClassifier.generate_custom_config("titanic_custom")
778
+ or
779
+ >>> AutoRegressor.generate_custom_config("housing_custom")
780
+ # The above code will generate "titanic_custom.json" file under the current working directory.
781
+
782
+ """
783
+ # Intializing class
784
+ generator = _GenerateCustomJson()
785
+ # Generating custom JSON data
786
+ data = generator._generate_custom_json()
787
+ # Converting to JSON
788
+ custom_json = json.dumps(data, indent=4)
789
+ # Save JSON data to the specified file
790
+ json_file = f"{file_name}.json"
791
+ with open(json_file, 'w') as file:
792
+ file.write(custom_json)
793
+ print(f"\n'{json_file}' file is generated successfully under the current working directory.")
794
+
795
+
796
+ class _Regression(_FeatureExplore, _FeatureEngineering, _DataPreparation, _ModelTraining):
797
+
798
+ def __init__(self,
799
+ data,
800
+ target_column,
801
+ custom_data = None):
802
+ """
803
+ DESCRIPTION:
804
+ Function initializes the data, target column for Regression.
805
+
806
+ PARAMETERS:
807
+ data:
808
+ Required Argument.
809
+ Specifies the input teradataml Dataframe.
810
+ Types: teradataml Dataframe
811
+
812
+ target_column:
813
+ Required Arugment.
814
+ Specifies the name of the target column in "data".
815
+ Types: str
816
+
817
+ custom_data:
818
+ Optional Arugment.
819
+ Specifies json object containing user customized input.
820
+ Types: json object
821
+ """
822
+ self.data = data
823
+ self.target_column = target_column
824
+ self.custom_data = custom_data
825
+
826
+
827
+ def _regression(self,
828
+ model_list = None,
829
+ auto = False,
830
+ verbose = 0,
831
+ max_runtime_secs = None,
832
+ stopping_metric = None,
833
+ stopping_tolerance = None):
834
+ """
835
+ DESCRIPTION:
836
+ Interal Function runs Regression.
837
+
838
+ PARAMETERS:
839
+ auto:
840
+ Optional Arugment.
841
+ Specifies whether to run AutoML in custom mode or auto mode.
842
+ When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
843
+ Types: bool
844
+
845
+ verbose:
846
+ Optional Argument.
847
+ Specifies the detailed execution steps based on verbose level.
848
+ Default Value: 0
849
+ Permitted Values:
850
+ * 0: prints the progress bar and leaderboard
851
+ * 1: prints the execution steps of AutoML.
852
+ * 2: prints the intermediate data between the execution of each step of AutoML.
853
+ Types: int
854
+
855
+ max_runtime_secs:
856
+ Optional Arugment.
857
+ Specifies the time limit in seconds for model training.
858
+ Types: int
859
+
860
+ stopping_metric:
861
+ Required, when "stopping_tolerance" is set, otherwise optional.
862
+ Specifies the stopping mertics for stopping tolerance in model training.
863
+ Types: str
864
+
865
+ stopping_tolerance:
866
+ Required, when "stopping_metric" is set, otherwise optional.
867
+ Specifies the stopping tolerance for stopping metrics in model training.
868
+ Types: float
869
+
870
+ RETURNS:
871
+ a tuple containing, model information and leaderboard.
872
+ """
873
+ # Feature Exploration Phase
874
+ _FeatureExplore.__init__(self,
875
+ data = self.data,
876
+ target_column = self.target_column,
877
+ verbose=verbose)
878
+ if verbose > 0:
879
+ self._exploration()
880
+ # Feature Engineering Phase
881
+ _FeatureEngineering.__init__(self,
882
+ data = self.data,
883
+ target_column = self.target_column,
884
+ model_list = model_list,
885
+ verbose = verbose,
886
+ custom_data = self.custom_data)
887
+ # Start time
888
+ start_time = time.time()
889
+ data, excluded_columns, target_label, data_transformation_params = self.feature_engineering(auto)
890
+
891
+ # Data preparation Phase
892
+ _DataPreparation.__init__(self,
893
+ data = self.data,
894
+ target_column = self.target_column,
895
+ verbose = verbose,
896
+ excluded_columns = excluded_columns,
897
+ custom_data = self.custom_data,
898
+ data_transform_dict = data_transformation_params)
899
+ features, data_transformation_params = self.data_preparation(auto)
900
+
901
+ # Calculating max_runtime_secs for model training by,
902
+ # subtracting the time taken for feature engineering and data preparation
903
+ max_runtime_secs = max_runtime_secs - (time.time() - start_time) \
904
+ if max_runtime_secs is not None else None
905
+
906
+ # Setting max_runtime_secs to 60 seconds if it is less than 0
907
+ max_runtime_secs = 60 if max_runtime_secs is not None and \
908
+ max_runtime_secs < 0 else max_runtime_secs
909
+
910
+ # Model Training
911
+ _ModelTraining.__init__(self,
912
+ data = self.data,
913
+ target_column = self.target_column,
914
+ model_list = model_list,
915
+ verbose = verbose,
916
+ features = features,
917
+ task_type = "Regression",
918
+ custom_data = self.custom_data)
919
+ models_info, leaderboard, target_count = self.model_training(auto = auto,
920
+ max_runtime_secs = max_runtime_secs,
921
+ stopping_metric = stopping_metric,
922
+ stopping_tolerance = stopping_tolerance)
923
+
924
+ return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
925
+
926
+ class _Classification(_FeatureExplore, _FeatureEngineering, _DataPreparation, _ModelTraining):
927
+
928
+ def __init__(self,
929
+ data,
930
+ target_column,
931
+ custom_data = None):
932
+ """
933
+ DESCRIPTION:
934
+ Function initializes the data, target column for Classification.
935
+
936
+ PARAMETERS:
937
+ data:
938
+ Required Argument.
939
+ Specifies the input teradataml Dataframe.
940
+ Types: teradataml Dataframe
941
+
942
+ target_column:
943
+ Required Arugment.
944
+ Specifies the name of the target column in "data".
945
+ Types: str
946
+
947
+ custom_data:
948
+ Optional Arugment.
949
+ Specifies json object containing user customized input.
950
+ Types: json object
951
+ """
952
+ self.data = data
953
+ self.target_column = target_column
954
+ self.custom_data = custom_data
955
+
956
+ def _classification(self,
957
+ model_list = None,
958
+ auto = False,
959
+ verbose = 0,
960
+ max_runtime_secs = None,
961
+ stopping_metric = None,
962
+ stopping_tolerance = None):
963
+ """
964
+ DESCRIPTION:
965
+ Interal Function runs Classification.
966
+
967
+ PARAMETERS:
968
+ auto:
969
+ Optional Arugment.
970
+ Specifies whether to run AutoML in custom mode or auto mode.
971
+ When set to False, runs in custom mode. Otherwise, by default runs in auto mode.
972
+ Types: bool
973
+
974
+ verbose:
975
+ Optional Argument.
976
+ Specifies the detailed execution steps based on verbose level.
977
+ Default Value: 0
978
+ Permitted Values:
979
+ * 0: prints the progress bar and leaderboard
980
+ * 1: prints the execution steps of AutoML.
981
+ * 2: prints the intermediate data between the execution of each step of AutoML.
982
+ Types: int
983
+
984
+ max_runtime_secs:
985
+ Optional Arugment.
986
+ Specifies the time limit in seconds for model training.
987
+ Types: int
988
+
989
+ stopping_metric:
990
+ Required, when "stopping_tolerance" is set, otherwise optional.
991
+ Specifies the stopping mertics for stopping tolerance in model training.
992
+ Types: str
993
+
994
+ stopping_tolerance:
995
+ Required, when "stopping_metric" is set, otherwise optional.
996
+ Specifies the stopping tolerance for stopping metrics in model training.
997
+ Types: float
998
+
999
+ RETURNS:
1000
+ a tuple containing, model information and leaderboard.
1001
+ """
1002
+
1003
+
1004
+ # Feature Exploration Phase
1005
+ _FeatureExplore.__init__(self,
1006
+ data = self.data,
1007
+ target_column = self.target_column,
1008
+ verbose=verbose)
1009
+ if verbose > 0:
1010
+ self._exploration()
1011
+ # Feature Engineeting Phase
1012
+ _FeatureEngineering.__init__(self,
1013
+ data = self.data,
1014
+ target_column = self.target_column,
1015
+ model_list = model_list,
1016
+ verbose = verbose,
1017
+ task_type = "Classification",
1018
+ custom_data = self.custom_data)
1019
+ # Start time
1020
+ start_time = time.time()
1021
+ data, excluded_columns, target_label, data_transformation_params = self.feature_engineering(auto)
1022
+ # Data Preparation Phase
1023
+ _DataPreparation.__init__(self,
1024
+ data = self.data,
1025
+ target_column = self.target_column,
1026
+ verbose = verbose,
1027
+ excluded_columns = excluded_columns,
1028
+ custom_data = self.custom_data,
1029
+ data_transform_dict = data_transformation_params,
1030
+ task_type = "Classification")
1031
+ features, data_transformation_params = self.data_preparation(auto)
1032
+
1033
+ # Calculating max_runtime_secs for model training by,
1034
+ # subtracting the time taken for feature engineering and data preparation
1035
+ max_runtime_secs = max_runtime_secs - (time.time() - start_time) \
1036
+ if max_runtime_secs is not None else None
1037
+
1038
+ # Setting max_runtime_secs to 60 seconds if it is less than 0
1039
+ max_runtime_secs = 60 if max_runtime_secs is not None and \
1040
+ max_runtime_secs < 0 else max_runtime_secs
1041
+
1042
+ # Model training
1043
+ _ModelTraining.__init__(self,
1044
+ data = self.data,
1045
+ target_column = self.target_column,
1046
+ model_list = model_list,
1047
+ verbose = verbose,
1048
+ features = features,
1049
+ task_type = "Classification",
1050
+ custom_data = self.custom_data)
1051
+ models_info, leaderboard, target_count = self.model_training(auto = auto,
1052
+ max_runtime_secs = max_runtime_secs,
1053
+ stopping_metric = stopping_metric,
1054
+ stopping_tolerance = stopping_tolerance)
1055
+
1056
+ return (models_info, leaderboard, target_count, target_label, data_transformation_params, self.table_name_mapping)
1057
+
1058
+ def _target_column_details(self):
1059
+ """
1060
+ DESCRIPTION:
1061
+ Internal function displays the target column distribution of Target column/ Response column.
1062
+ """
1063
+ # If data visualization libraries are available
1064
+ if self._check_visualization_libraries() and not _is_terminal():
1065
+ import matplotlib.pyplot as plt
1066
+ import seaborn as sns
1067
+ self._display_msg(msg='\nTarget Column Distribution:',
1068
+ show_data=True)
1069
+ plt.figure(figsize=(6, 6))
1070
+ # Ploting a histogram for target column
1071
+ sns.countplot(data=self.data.select([self.target_column]).to_pandas(), x=self.target_column)
1072
+ plt.show()
1073
+
1074
+ def _check_data_imbalance(self,
1075
+ data=None):
1076
+ """
1077
+ DESCRIPTION:
1078
+ Internal function calculate and checks the imbalance in dataset.
1079
+
1080
+ PARAMETERS:
1081
+ data:
1082
+ Required Argument.
1083
+ Specifies the input teradataml DataFrame.
1084
+ Types: teradataml Dataframe
1085
+
1086
+ RETURNS:
1087
+ bool, True if imbalance dataset detected, Otherwise False.
1088
+ """
1089
+ self._display_msg(msg="\nChecking imbalance data ...",
1090
+ progress_bar=self.progress_bar)
1091
+ # Calculate the distribution of classes in the target column
1092
+ class_dist = data[self.target_column].value_counts().values
1093
+
1094
+ # Find the minimum count of data points among the classes
1095
+ min_ct = np.min(class_dist)
1096
+
1097
+ # Find the maximum count of data points among the classes
1098
+ max_ct = np.max(class_dist)
1099
+
1100
+ # Calculate the imbalance ratio(minimum count to maximum count)
1101
+ imb_ratio = min_ct / max_ct
1102
+
1103
+ # Check if the imbalance ratio less than the threshold of 0.4
1104
+ if imb_ratio < 0.4:
1105
+ self._display_msg(msg="Imbalance Found.",
1106
+ progress_bar=self.progress_bar)
1107
+ return True
1108
+
1109
+ self._display_msg(msg="Imbalance Not Found.",
1110
+ progress_bar=self.progress_bar)
1111
+ return False
1112
+
1113
+ def _set_custom_sampling(self):
1114
+ """
1115
+ DESCRIPTION:
1116
+ Function to handle customized data sampling for imbalance dataset.
1117
+ """
1118
+ # Fetching user input for data sampling
1119
+ data_imbalance_input = self.custom_data.get("DataImbalanceIndicator", False)
1120
+ if data_imbalance_input:
1121
+ # Extracting method for performing data sampling
1122
+ handling_method = self.custom_data.get("DataImbalanceMethod", None)
1123
+ if handling_method == 'SMOTE':
1124
+ self._data_sampling_method = "SMOTE"
1125
+ elif handling_method == 'NearMiss':
1126
+ self._data_sampling_method = "NearMiss"
1127
+ else:
1128
+ self._display_msg(inline_msg="Provided method for data imbalance is not supported. AutoML will Proceed with default option.",
1129
+ progress_bar=self.progress_bar)
1130
+ else:
1131
+ self._display_msg(inline_msg="No information provided for performing customized imbalanced dataset sampling. AutoML will Proceed with default option.",
1132
+ progress_bar=self.progress_bar)
1133
+
1134
+ def _data_sampling(self,
1135
+ data):
1136
+ """
1137
+ DESCRIPTION:
1138
+ Function to handle data imbalance in dataset using sampling techniques
1139
+ in case of classification.
1140
+
1141
+ PARAMETERS:
1142
+ data:
1143
+ Required Argument.
1144
+ Specifies the input teradataml DataFrame.
1145
+ Types: pandas Dataframe.
1146
+
1147
+ RETURNS:
1148
+ Teradataml dataframe after handling data imbalance.
1149
+ """
1150
+ self._display_msg(msg="\nStarting data imbalance handling ...",
1151
+ progress_bar=self.progress_bar,
1152
+ show_data=True)
1153
+
1154
+ # Importing required libraries
1155
+ from imblearn.over_sampling import SMOTE
1156
+ from imblearn.under_sampling import NearMiss
1157
+
1158
+ st = time.time()
1159
+ self._display_msg(msg=f"\nBalancing the data using {self._data_sampling_method}...",
1160
+ progress_bar=self.progress_bar,
1161
+ show_data=True)
1162
+ # Performing data sampling
1163
+ try:
1164
+ # Fetching the minimum target column label count and
1165
+ # accordingly setting the number of neighbors for the sampler
1166
+ min_label_count = min(data[self.target_column].value_counts())
1167
+ if self._data_sampling_method == 'SMOTE':
1168
+ n_neighbors = min(5, min_label_count - 1)
1169
+ sampling_method = SMOTE(k_neighbors=n_neighbors, random_state=5)
1170
+ else:
1171
+ n_neighbors = min(3, min_label_count)
1172
+ sampling_method = NearMiss(version=1, n_neighbors=n_neighbors)
1173
+
1174
+ # Fitting on dataset
1175
+ xt, yt = sampling_method.fit_resample(data.drop(columns=[self.target_column], axis=1),
1176
+ data[self.target_column])
1177
+
1178
+ # Merging the balanced dataset with target column
1179
+ balanced_df = (xt.reset_index().merge(yt.reset_index(), on="index"))
1180
+ balanced_df.drop(columns=['index', 'id'], axis=1, inplace=True)
1181
+ balanced_df = balanced_df.reset_index().rename(columns={'index': 'id'})
1182
+
1183
+ et = time.time()
1184
+ self._display_msg(msg=f"Handled imbalanced dataset using {self._data_sampling_method}: {et - st:.2f} sec",
1185
+ progress_bar=self.progress_bar,
1186
+ show_data=True)
1187
+ except:
1188
+ self._display_msg(msg=f"Balancing using {self._data_sampling_method} Failed!!",
1189
+ progress_bar=self.progress_bar,
1190
+ show_data=True)
1191
+ # Returning original data if the data sampler fails
1192
+ return data
1193
+
1194
+ self._display_msg(msg="Completed data imbalance handling.",
1195
+ progress_bar=self.progress_bar,
1196
+ show_data=True)
1197
+ # Returning balanced dataframe
1198
+ return balanced_df
1199
+
1200
+ class AutoRegressor(AutoML):
1201
+
1202
+ def __init__(self,
1203
+ include = None,
1204
+ exclude = None,
1205
+ verbose=0,
1206
+ max_runtime_secs=None,
1207
+ stopping_metric=None,
1208
+ stopping_tolerance=None,
1209
+ custom_config_file=None
1210
+ ):
1211
+ """
1212
+ DESCRIPTION:
1213
+ AutoRegressor is a special purpose AutoML feature to run regression specific tasks.
1214
+
1215
+ PARAMETERS:
1216
+ include:
1217
+ Optional Argument.
1218
+ Specifies the model algorithms to be used for model training phase.
1219
+ By default, all 5 models are used for training for regression and binary
1220
+ classification problem, while only 3 models are used for multi-class.
1221
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
1222
+ Types: str OR list of str
1223
+
1224
+ exclude:
1225
+ Optional Argument.
1226
+ Specifies the model algorithms to be excluded from model training phase.
1227
+ No model is excluded by default.
1228
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
1229
+ Types: str OR list of str
1230
+
1231
+ verbose:
1232
+ Optional Argument.
1233
+ Specifies the detailed execution steps based on verbose level.
1234
+ Default Value: 0
1235
+ Permitted Values:
1236
+ * 0: prints the progress bar and leaderboard
1237
+ * 1: prints the execution steps of AutoML.
1238
+ * 2: prints the intermediate data between the execution of each step of AutoML.
1239
+ Types: int
1240
+
1241
+ max_runtime_secs:
1242
+ Optional Arugment.
1243
+ Specifies the time limit in seconds for model training.
1244
+ Types: int
1245
+
1246
+ stopping_metric:
1247
+ Required, when "stopping_tolerance" is set, otherwise optional.
1248
+ Specifies the stopping mertics for stopping tolerance in model training.
1249
+ Permitted Values:
1250
+ * For task_type "Regression": "R2", "MAE", "MSE", "MSLE",
1251
+ "RMSE", "RMSLE"
1252
+ * For task_type "Classification": 'MICRO-F1','MACRO-F1',
1253
+ 'MICRO-RECALL','MACRO-RECALL',
1254
+ 'MICRO-PRECISION', 'MACRO-PRECISION',
1255
+ 'WEIGHTED-PRECISION','WEIGHTED-RECALL',
1256
+ 'WEIGHTED-F1', 'ACCURACY'
1257
+ Types: str
1258
+
1259
+ stopping_tolerance:
1260
+ Required, when "stopping_metric" is set, otherwise optional.
1261
+ Specifies the stopping tolerance for stopping metrics in model training.
1262
+ Types: float
1263
+
1264
+ custom_config_file:
1265
+ Optional Argument.
1266
+ Specifies the path of JSON file in case of custom run.
1267
+ Types: str
1268
+
1269
+ RETURNS:
1270
+ Instance of AutoRegressor.
1271
+
1272
+ RAISES:
1273
+ TeradataMlException, TypeError, ValueError
1274
+
1275
+ EXAMPLES:
1276
+ # Notes:
1277
+ # 1. Get the connection to Vantage to execute the function.
1278
+ # 2. One must import the required functions mentioned in
1279
+ # the example from teradataml.
1280
+ # 3. Function will raise error if not supported on the Vantage
1281
+ # user is connected to.
1282
+
1283
+ # Load the example data.
1284
+ >>> load_example_data("decisionforestpredict", ["housing_train", "housing_test"])
1285
+
1286
+ # Create teradataml DataFrame object.
1287
+ >>> housing_train = DataFrame.from_table("housing_train")
1288
+
1289
+ # Example 1 : Run AutoRegressor using default options.
1290
+ # Scenario : Predict the price of house based on different factors.
1291
+
1292
+ # Create instance of AutoRegressor.
1293
+ >>> automl_obj = AutoRegressor()
1294
+
1295
+ # Fit the data.
1296
+ >>> automl_obj.fit(housing_train, "price")
1297
+
1298
+ # Predict using best performing model.
1299
+ >>> prediction = automl_obj.predict()
1300
+ >>> prediction
1301
+
1302
+ # Run predict for new test data with best performing model.
1303
+ >>> prediction = automl_obj.predict(housing_test)
1304
+ >>> prediction
1305
+
1306
+ # Run predict for new test data with second best performing model.
1307
+ >>> prediction = automl_obj.predict(housing_test, rank=2)
1308
+ >>> prediction
1309
+
1310
+ # Display leaderboard.
1311
+ >>> automl_obj.leaderboard()
1312
+
1313
+ # Display best performing model.
1314
+ >>> automl_obj.leader()
1315
+
1316
+ # Example 2 : Run AutoRegressor for regression problem with early stopping metric and tolerance.
1317
+ # Scenario : Predict the price of house based on different factors.
1318
+ # Use custom configuration file to customize different
1319
+ # processes of AutoML Run. Define performance threshold
1320
+ # to acquire for the available models, and terminate training
1321
+ # upon meeting the stipulated performance criteria.
1322
+
1323
+ # Generate custom configuration file.
1324
+ >>> AutoRegressor.generate_custom_config("custom_housing")
1325
+
1326
+ # Create instance of AutoRegressor.
1327
+ >>> automl_obj = AutoRegressor(verbose=2,
1328
+ >>> exclude="xgboost",
1329
+ >>> stopping_metric="R2",
1330
+ >>> stopping_tolerance=0.7,
1331
+ >>> custom_config_file="custom_housing.json")
1332
+ # Fit the data.
1333
+ >>> automl_obj.fit(housing_train, "price")
1334
+
1335
+ # Run predict with best performing model.
1336
+ >>> prediction = automl_obj.predict()
1337
+ >>> prediction
1338
+
1339
+ # Display leaderboard.
1340
+ >>> automl_obj.leaderboard()
1341
+
1342
+ # Example 3 : Run AutoRegressor for regression problem with maximum runtime.
1343
+ # Scenario : Predict the price of house based on different factors.
1344
+ # Run AutoML to get the best performing model in specified time.
1345
+
1346
+ # Create instance of AutoRegressor.
1347
+ >>> automl_obj = AutoRegressor(verbose=2,
1348
+ >>> exclude="xgboost",
1349
+ >>> max_runtime_secs=500)
1350
+ # Fit the data.
1351
+ >>> automl_obj.fit(housing_train, "price")
1352
+
1353
+ # Run predict with best performing model.
1354
+ >>> prediction = automl_obj.predict()
1355
+ >>> prediction
1356
+
1357
+ # Run predict with second best performing model.
1358
+ >>> prediction = automl_obj.predict(rank=2)
1359
+ >>> prediction
1360
+
1361
+ # Display leaderboard.
1362
+ >>> automl_obj.leaderboard()
1363
+
1364
+ # Display best performing model.
1365
+ >>> automl_obj.leader()
1366
+ """
1367
+ self.verbose = verbose
1368
+ self.max_runtime_secs = max_runtime_secs
1369
+ self.stopping_metric = stopping_metric
1370
+ self.stopping_tolerance = stopping_tolerance
1371
+ self.custom_config_file = custom_config_file
1372
+ self.task_type = "Regression"
1373
+ self.include = include
1374
+ self.exclude = exclude
1375
+
1376
+ super(AutoRegressor, self).__init__(task_type=self.task_type,
1377
+ include = self.include,
1378
+ exclude = self.exclude,
1379
+ verbose=self.verbose,
1380
+ max_runtime_secs=self.max_runtime_secs,
1381
+ stopping_metric=self.stopping_metric,
1382
+ stopping_tolerance=self.stopping_tolerance,
1383
+ custom_config_file=self.custom_config_file)
1384
+ class AutoClassifier(AutoML):
1385
+
1386
+ def __init__(self,
1387
+ include = None,
1388
+ exclude = None,
1389
+ verbose=0,
1390
+ max_runtime_secs=None,
1391
+ stopping_metric=None,
1392
+ stopping_tolerance=None,
1393
+ custom_config_file=None
1394
+ ):
1395
+ """
1396
+ DESCRIPTION:
1397
+ AutoClassifier is a special purpose AutoML feature to run classification specific tasks.
1398
+
1399
+ PARAMETERS:
1400
+ include:
1401
+ Optional Argument.
1402
+ Specifies the model algorithms to be used for model training phase.
1403
+ By default, all 5 models are used for training for regression and binary
1404
+ classification problem, while only 3 models are used for multi-class.
1405
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
1406
+ Types: str OR list of str
1407
+
1408
+ exclude:
1409
+ Optional Argument.
1410
+ Specifies the model algorithms to be excluded from model training phase.
1411
+ No model is excluded by default.
1412
+ Permitted Values: "glm", "svm", "knn", "decision_forest", "xgboost"
1413
+ Types: str OR list of str
1414
+
1415
+ verbose:
1416
+ Optional Argument.
1417
+ Specifies the detailed execution steps based on verbose level.
1418
+ Default Value: 0
1419
+ Permitted Values:
1420
+ * 0: prints the progress bar and leaderboard
1421
+ * 1: prints the execution steps of AutoML.
1422
+ * 2: prints the intermediate data between the execution of each step of AutoML.
1423
+ Types: int
1424
+
1425
+ max_runtime_secs:
1426
+ Optional Arugment.
1427
+ Specifies the time limit in seconds for model training.
1428
+ Types: int
1429
+
1430
+ stopping_metric:
1431
+ Required, when "stopping_tolerance" is set, otherwise optional.
1432
+ Specifies the stopping mertics for stopping tolerance in model training.
1433
+ Types: str
1434
+
1435
+ stopping_tolerance:
1436
+ Required, when "stopping_metric" is set, otherwise optional.
1437
+ Specifies the stopping tolerance for stopping metrics in model training.
1438
+ Permitted Values:
1439
+ * For task_type "Regression": "R2", "MAE", "MSE", "MSLE",
1440
+ "RMSE", "RMSLE"
1441
+ * For task_type "Classification": 'MICRO-F1','MACRO-F1',
1442
+ 'MICRO-RECALL','MACRO-RECALL',
1443
+ 'MICRO-PRECISION', 'MACRO-PRECISION',
1444
+ 'WEIGHTED-PRECISION','WEIGHTED-RECALL',
1445
+ 'WEIGHTED-F1', 'ACCURACY'
1446
+ Types: float
1447
+
1448
+ custom_config_file:
1449
+ Optional Argument.
1450
+ Specifies the path of json file in case of custom run.
1451
+ Types: str
1452
+
1453
+ RETURNS:
1454
+ Instance of AutoClassifier.
1455
+
1456
+ RAISES:
1457
+ TeradataMlException, TypeError, ValueError
1458
+
1459
+ EXAMPLES:
1460
+ # Notes:
1461
+ # 1. Get the connection to Vantage to execute the function.
1462
+ # 2. One must import the required functions mentioned in
1463
+ # the example from teradataml.
1464
+ # 3. Function will raise error if not supported on the Vantage
1465
+ # user is connected to.
1466
+
1467
+ # Load the example data.
1468
+ >>> load_example_data("teradataml", ["titanic", "iris_input"])
1469
+ >>> load_example_data("GLMPredict", ["admissions_test", "admissions_train"])
1470
+
1471
+ # Create teradataml DataFrame object.
1472
+ >>> admissions_train = DataFrame.from_table("admissions_train")
1473
+ >>> titanic = DataFrame.from_table("titanic")
1474
+ >>> iris_input = DataFrame.from_table("iris_input")
1475
+ >>> admissions_test = DataFrame.from_table("admissions_test")
1476
+
1477
+ # Example 1 : Run AutoClassifier for binary classification problem
1478
+ # Scenario : Predict whether a student will be admitted to a university
1479
+ # based on different factors. Run AutoML to get the best performing model
1480
+ # out of available models.
1481
+
1482
+ # Create instance of AutoClassifier..
1483
+ >>> automl_obj = AutoClassifier()
1484
+
1485
+ # Fit the data.
1486
+ >>> automl_obj.fit(admissions_train, "admitted")
1487
+
1488
+ # Predict using best performing model.
1489
+ >>> prediction = automl_obj.predict()
1490
+ >>> prediction
1491
+
1492
+ # Run predict for new test data with best performing model.
1493
+ >>> prediction = automl_obj.predict(admissions_test)
1494
+ >>> prediction
1495
+
1496
+ # Run predict for new test data with second best performing model.
1497
+ >>> prediction = automl_obj.predict(admissions_test, rank=2)
1498
+ >>> prediction
1499
+
1500
+ # Display leaderboard.
1501
+ >>> automl_obj.leaderboard()
1502
+
1503
+ # Display best performing model.
1504
+ >>> automl_obj.leader()
1505
+
1506
+ # Example 2 : Run AutoClassifier for binary classification.
1507
+ # Scenario : Predict whether passenger aboard the RMS Titanic survived
1508
+ # or not based on differect factors. Run AutoML to get the
1509
+ # best performing model out of available models. Use custom
1510
+ # configuration file to customize different processes of
1511
+ # AutoML Run.
1512
+
1513
+ # Generate custom configuration file.
1514
+ >>> AutoClassifier.generate_custom_config("custom_titanic")
1515
+
1516
+ # Create instance of AutoClassifier.
1517
+ >>> automl_obj = AutoClassifier(verbose=2,
1518
+ >>> custom_config_file="custom_titanic.json")
1519
+ # Fit the data.
1520
+ >>> automl_obj.fit(titanic, titanic.survived)
1521
+
1522
+ # Run predict with best performing model.
1523
+ >>> prediction = automl_obj.predict()
1524
+ >>> prediction
1525
+
1526
+ # Run predict with second best performing model.
1527
+ >>> prediction = automl_obj.predict(rank=2)
1528
+ >>> prediction
1529
+
1530
+ # Display leaderboard.
1531
+ >>> automl_obj.leaderboard()
1532
+
1533
+ # Display best performing model.
1534
+ >>> automl_obj.leader()
1535
+
1536
+ # Example 3 : Run AutoClassifier for multiclass classification problem.
1537
+ # Scenario : Predict the species of iris flower based on different factors.
1538
+ # Run AutoML to get the best performing model out of available
1539
+ # models. Use custom configuration file to customize different
1540
+ # processes of AutoML Run.
1541
+
1542
+ # Generate custom configuration file.
1543
+ >>> AutoClassifier.generate_custom_config("custom_iris")
1544
+
1545
+ # Create instance of AutoClassifier.
1546
+ >>> automl_obj = AutoClassifier(verbose=1,
1547
+ >>> custom_config_file="custom_iris.json")
1548
+ # Fit the data.
1549
+ >>> automl_obj.fit(iris_input, "species")
1550
+
1551
+ # Predict using best performing model.
1552
+ >>> prediction = automl_obj.predict()
1553
+ >>> prediction
1554
+
1555
+ # Display leaderboard.
1556
+ >>> automl_obj.leaderboard()
1557
+
1558
+ # Display best performing model.
1559
+ >>> automl_obj.leader()
1560
+
1561
+ # Example 4 : Run AutoClassifier for classification problem with stopping metric and tolerance.
1562
+ # Scenario : Predict whether passenger aboard the RMS Titanic survived
1563
+ # or not based on differect factors. Use custom configuration
1564
+ # file to customize different processes of AutoML Run. Define
1565
+ # performance threshold to acquire for the available models, and
1566
+ # terminate training upon meeting the stipulated performance criteria.
1567
+
1568
+ # Generate custom configuration file.
1569
+ >>> AutoClassifier.generate_custom_config("custom_titanic")
1570
+
1571
+ # Create instance of AutoClassifier.
1572
+ >>> automl_obj = AutoClassifier(verbose=2,
1573
+ >>> exclude="xgboost",
1574
+ >>> stopping_metric="MICRO-F1",
1575
+ >>> stopping_tolerance=0.7,
1576
+ >>> custom_config_file="custom_titanic.json")
1577
+ # Fit the data.
1578
+ >>> automl_obj.fit(titanic, titanic.survived)
1579
+
1580
+ # Run predict with best performing model.
1581
+ >>> prediction = automl_obj.predict()
1582
+ >>> prediction
1583
+
1584
+ # Display leaderboard.
1585
+ >>> automl_obj.leaderboard()
1586
+
1587
+ # Example 5 : Run AutoClassifier for classification problem with maximum runtime.
1588
+ # Scenario : Predict the species of iris flower based on different factors.
1589
+ # Run AutoML to get the best performing model in specified time.
1590
+
1591
+ # Create instance of AutoClassifier.
1592
+ >>> automl_obj = AutoClassifier(verbose=2,
1593
+ >>> exclude="xgboost",
1594
+ >>> max_runtime_secs=500)
1595
+ # Fit the data.
1596
+ >>> automl_obj.fit(iris_input, iris_input.species)
1597
+
1598
+ # Run predict with best performing model.
1599
+ >>> prediction = automl_obj.predict()
1600
+ >>> prediction
1601
+
1602
+ # Run predict with second best performing model.
1603
+ >>> prediction = automl_obj.predict(rank=2)
1604
+ >>> prediction
1605
+
1606
+ # Display leaderboard.
1607
+ >>> automl_obj.leaderboard()
1608
+
1609
+ # Display best performing model.
1610
+ >>> automl_obj.leader()
1611
+ """
1612
+ self.verbose = verbose
1613
+ self.max_runtime_secs = max_runtime_secs
1614
+ self.stopping_metric = stopping_metric
1615
+ self.stopping_tolerance = stopping_tolerance
1616
+ self.custom_config_file = custom_config_file
1617
+ self.task_type = "Classification"
1618
+ self.include = include
1619
+ self.exclude = exclude
1620
+
1621
+ super(AutoClassifier, self).__init__(task_type=self.task_type,
1622
+ include = self.include,
1623
+ exclude = self.exclude,
1624
+ verbose=self.verbose,
1625
+ max_runtime_secs=self.max_runtime_secs,
1626
+ stopping_metric=self.stopping_metric,
1627
+ stopping_tolerance=self.stopping_tolerance,
1628
+ custom_config_file=self.custom_config_file)