teradataml 17.20.0.6__py3-none-any.whl → 20.0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (432) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +238 -1
  4. teradataml/__init__.py +13 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/Transformations.py +4 -4
  7. teradataml/analytics/__init__.py +0 -2
  8. teradataml/analytics/analytic_function_executor.py +3 -0
  9. teradataml/analytics/json_parser/utils.py +13 -12
  10. teradataml/analytics/sqle/DecisionTreePredict.py +15 -30
  11. teradataml/analytics/sqle/NaiveBayesPredict.py +11 -20
  12. teradataml/analytics/sqle/__init__.py +0 -13
  13. teradataml/analytics/utils.py +1 -0
  14. teradataml/analytics/valib.py +3 -0
  15. teradataml/automl/__init__.py +1628 -0
  16. teradataml/automl/custom_json_utils.py +1270 -0
  17. teradataml/automl/data_preparation.py +993 -0
  18. teradataml/automl/data_transformation.py +727 -0
  19. teradataml/automl/feature_engineering.py +1648 -0
  20. teradataml/automl/feature_exploration.py +547 -0
  21. teradataml/automl/model_evaluation.py +163 -0
  22. teradataml/automl/model_training.py +887 -0
  23. teradataml/catalog/__init__.py +0 -2
  24. teradataml/catalog/byom.py +49 -6
  25. teradataml/catalog/function_argument_mapper.py +0 -2
  26. teradataml/catalog/model_cataloging_utils.py +2 -1021
  27. teradataml/common/aed_utils.py +6 -2
  28. teradataml/common/constants.py +50 -58
  29. teradataml/common/deprecations.py +160 -0
  30. teradataml/common/garbagecollector.py +61 -104
  31. teradataml/common/messagecodes.py +27 -36
  32. teradataml/common/messages.py +11 -15
  33. teradataml/common/utils.py +205 -287
  34. teradataml/common/wrapper_utils.py +1 -110
  35. teradataml/context/context.py +150 -78
  36. teradataml/data/bank_churn.csv +10001 -0
  37. teradataml/data/bmi.csv +501 -0
  38. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +3 -3
  39. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +6 -5
  40. teradataml/data/docs/sqle/docs_17_10/Fit.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +1 -1
  42. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +1 -1
  43. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +2 -2
  44. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +2 -1
  45. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +1 -0
  46. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +1 -1
  47. teradataml/data/docs/sqle/docs_17_10/Transform.py +2 -1
  48. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +3 -3
  49. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +6 -5
  50. teradataml/data/docs/sqle/docs_17_20/Fit.py +1 -1
  51. teradataml/data/docs/sqle/docs_17_20/GLM.py +1 -1
  52. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +9 -10
  53. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +3 -2
  54. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +16 -15
  55. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +2 -2
  56. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +2 -2
  57. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +8 -8
  58. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +21 -20
  59. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +1 -1
  60. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +8 -3
  61. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +6 -5
  62. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +6 -6
  63. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +2 -1
  64. teradataml/data/docs/sqle/docs_17_20/SVM.py +1 -1
  65. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +16 -16
  66. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +1 -0
  67. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +3 -2
  68. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +4 -4
  69. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +19 -19
  70. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +5 -4
  71. teradataml/data/docs/sqle/docs_17_20/Transform.py +2 -2
  72. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +9 -9
  73. teradataml/data/fish.csv +160 -0
  74. teradataml/data/glass_types.csv +215 -0
  75. teradataml/data/insurance.csv +1 -1
  76. teradataml/data/iris_data.csv +151 -0
  77. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +1 -0
  78. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +1 -0
  79. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +1 -0
  80. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +1 -0
  81. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +1 -0
  82. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +1 -0
  83. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +1 -0
  84. teradataml/data/load_example_data.py +3 -0
  85. teradataml/data/multi_model_classification.csv +401 -0
  86. teradataml/data/multi_model_regression.csv +401 -0
  87. teradataml/data/openml_example.json +63 -0
  88. teradataml/data/scripts/deploy_script.py +65 -0
  89. teradataml/data/scripts/mapper.R +20 -0
  90. teradataml/data/scripts/sklearn/__init__.py +0 -0
  91. teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
  92. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
  93. teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
  94. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
  95. teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
  96. teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
  97. teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
  98. teradataml/data/templates/open_source_ml.json +9 -0
  99. teradataml/data/teradataml_example.json +73 -1
  100. teradataml/data/test_classification.csv +101 -0
  101. teradataml/data/test_prediction.csv +101 -0
  102. teradataml/data/test_regression.csv +101 -0
  103. teradataml/data/train_multiclass.csv +101 -0
  104. teradataml/data/train_regression.csv +101 -0
  105. teradataml/data/train_regression_multiple_labels.csv +101 -0
  106. teradataml/data/wine_data.csv +1600 -0
  107. teradataml/dataframe/copy_to.py +79 -13
  108. teradataml/dataframe/data_transfer.py +8 -0
  109. teradataml/dataframe/dataframe.py +910 -311
  110. teradataml/dataframe/dataframe_utils.py +102 -5
  111. teradataml/dataframe/fastload.py +11 -3
  112. teradataml/dataframe/setop.py +15 -2
  113. teradataml/dataframe/sql.py +3735 -77
  114. teradataml/dataframe/sql_function_parameters.py +56 -5
  115. teradataml/dataframe/vantage_function_types.py +45 -1
  116. teradataml/dataframe/window.py +30 -29
  117. teradataml/dbutils/dbutils.py +18 -1
  118. teradataml/geospatial/geodataframe.py +18 -7
  119. teradataml/geospatial/geodataframecolumn.py +5 -0
  120. teradataml/hyperparameter_tuner/optimizer.py +910 -120
  121. teradataml/hyperparameter_tuner/utils.py +131 -37
  122. teradataml/lib/aed_0_1.dll +0 -0
  123. teradataml/lib/libaed_0_1.dylib +0 -0
  124. teradataml/lib/libaed_0_1.so +0 -0
  125. teradataml/libaed_0_1.dylib +0 -0
  126. teradataml/libaed_0_1.so +0 -0
  127. teradataml/opensource/__init__.py +1 -0
  128. teradataml/opensource/sklearn/__init__.py +1 -0
  129. teradataml/opensource/sklearn/_class.py +255 -0
  130. teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
  131. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  132. teradataml/opensource/sklearn/constants.py +54 -0
  133. teradataml/options/__init__.py +3 -6
  134. teradataml/options/configure.py +21 -20
  135. teradataml/scriptmgmt/UserEnv.py +61 -5
  136. teradataml/scriptmgmt/lls_utils.py +135 -53
  137. teradataml/table_operators/Apply.py +38 -6
  138. teradataml/table_operators/Script.py +45 -308
  139. teradataml/table_operators/TableOperator.py +182 -591
  140. teradataml/table_operators/__init__.py +0 -1
  141. teradataml/table_operators/table_operator_util.py +32 -40
  142. teradataml/utils/validators.py +127 -3
  143. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +243 -3
  144. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/RECORD +147 -391
  145. teradataml/analytics/mle/AdaBoost.py +0 -651
  146. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  147. teradataml/analytics/mle/Antiselect.py +0 -342
  148. teradataml/analytics/mle/Arima.py +0 -641
  149. teradataml/analytics/mle/ArimaPredict.py +0 -477
  150. teradataml/analytics/mle/Attribution.py +0 -1070
  151. teradataml/analytics/mle/Betweenness.py +0 -658
  152. teradataml/analytics/mle/Burst.py +0 -711
  153. teradataml/analytics/mle/CCM.py +0 -600
  154. teradataml/analytics/mle/CCMPrepare.py +0 -324
  155. teradataml/analytics/mle/CFilter.py +0 -460
  156. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  157. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  158. teradataml/analytics/mle/Closeness.py +0 -737
  159. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  160. teradataml/analytics/mle/Correlation.py +0 -477
  161. teradataml/analytics/mle/Correlation2.py +0 -573
  162. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  163. teradataml/analytics/mle/CoxPH.py +0 -556
  164. teradataml/analytics/mle/CoxSurvival.py +0 -478
  165. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  166. teradataml/analytics/mle/DTW.py +0 -623
  167. teradataml/analytics/mle/DWT.py +0 -564
  168. teradataml/analytics/mle/DWT2D.py +0 -599
  169. teradataml/analytics/mle/DecisionForest.py +0 -716
  170. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  171. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  172. teradataml/analytics/mle/DecisionTree.py +0 -830
  173. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  174. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  175. teradataml/analytics/mle/FMeasure.py +0 -402
  176. teradataml/analytics/mle/FPGrowth.py +0 -734
  177. teradataml/analytics/mle/FrequentPaths.py +0 -695
  178. teradataml/analytics/mle/GLM.py +0 -558
  179. teradataml/analytics/mle/GLML1L2.py +0 -547
  180. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  181. teradataml/analytics/mle/GLMPredict.py +0 -529
  182. teradataml/analytics/mle/HMMDecoder.py +0 -945
  183. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  184. teradataml/analytics/mle/HMMSupervised.py +0 -521
  185. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  186. teradataml/analytics/mle/Histogram.py +0 -561
  187. teradataml/analytics/mle/IDWT.py +0 -476
  188. teradataml/analytics/mle/IDWT2D.py +0 -493
  189. teradataml/analytics/mle/IdentityMatch.py +0 -763
  190. teradataml/analytics/mle/Interpolator.py +0 -918
  191. teradataml/analytics/mle/KMeans.py +0 -485
  192. teradataml/analytics/mle/KNN.py +0 -627
  193. teradataml/analytics/mle/KNNRecommender.py +0 -488
  194. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  195. teradataml/analytics/mle/LAR.py +0 -439
  196. teradataml/analytics/mle/LARPredict.py +0 -478
  197. teradataml/analytics/mle/LDA.py +0 -548
  198. teradataml/analytics/mle/LDAInference.py +0 -492
  199. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  200. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  201. teradataml/analytics/mle/LinReg.py +0 -433
  202. teradataml/analytics/mle/LinRegPredict.py +0 -438
  203. teradataml/analytics/mle/MinHash.py +0 -544
  204. teradataml/analytics/mle/Modularity.py +0 -587
  205. teradataml/analytics/mle/NEREvaluator.py +0 -410
  206. teradataml/analytics/mle/NERExtractor.py +0 -595
  207. teradataml/analytics/mle/NERTrainer.py +0 -458
  208. teradataml/analytics/mle/NGrams.py +0 -570
  209. teradataml/analytics/mle/NPath.py +0 -634
  210. teradataml/analytics/mle/NTree.py +0 -549
  211. teradataml/analytics/mle/NaiveBayes.py +0 -462
  212. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  213. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  214. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  215. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  216. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  217. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  218. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  219. teradataml/analytics/mle/POSTagger.py +0 -417
  220. teradataml/analytics/mle/Pack.py +0 -411
  221. teradataml/analytics/mle/PageRank.py +0 -535
  222. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  223. teradataml/analytics/mle/PathGenerator.py +0 -367
  224. teradataml/analytics/mle/PathStart.py +0 -464
  225. teradataml/analytics/mle/PathSummarizer.py +0 -470
  226. teradataml/analytics/mle/Pivot.py +0 -471
  227. teradataml/analytics/mle/ROC.py +0 -425
  228. teradataml/analytics/mle/RandomSample.py +0 -637
  229. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  230. teradataml/analytics/mle/SAX.py +0 -779
  231. teradataml/analytics/mle/SVMDense.py +0 -677
  232. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  233. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  234. teradataml/analytics/mle/SVMSparse.py +0 -557
  235. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  236. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  237. teradataml/analytics/mle/Sampling.py +0 -549
  238. teradataml/analytics/mle/Scale.py +0 -565
  239. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  240. teradataml/analytics/mle/ScaleMap.py +0 -378
  241. teradataml/analytics/mle/ScaleSummary.py +0 -320
  242. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  243. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  244. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  245. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  246. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  247. teradataml/analytics/mle/Sessionize.py +0 -475
  248. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  249. teradataml/analytics/mle/StringSimilarity.py +0 -425
  250. teradataml/analytics/mle/TF.py +0 -389
  251. teradataml/analytics/mle/TFIDF.py +0 -504
  252. teradataml/analytics/mle/TextChunker.py +0 -414
  253. teradataml/analytics/mle/TextClassifier.py +0 -399
  254. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  255. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  256. teradataml/analytics/mle/TextMorph.py +0 -494
  257. teradataml/analytics/mle/TextParser.py +0 -623
  258. teradataml/analytics/mle/TextTagger.py +0 -530
  259. teradataml/analytics/mle/TextTokenizer.py +0 -502
  260. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  261. teradataml/analytics/mle/Unpack.py +0 -526
  262. teradataml/analytics/mle/Unpivot.py +0 -438
  263. teradataml/analytics/mle/VarMax.py +0 -776
  264. teradataml/analytics/mle/VectorDistance.py +0 -762
  265. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  266. teradataml/analytics/mle/XGBoost.py +0 -842
  267. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  268. teradataml/analytics/mle/__init__.py +0 -123
  269. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  270. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  271. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  272. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  273. teradataml/analytics/mle/json/arima_mle.json +0 -172
  274. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  275. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  276. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  277. teradataml/analytics/mle/json/burst_mle.json +0 -140
  278. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  279. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  280. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  281. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  282. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  283. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  284. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  285. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  286. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  287. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  288. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  289. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  290. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  291. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  292. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  293. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  294. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  295. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  296. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  297. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  298. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  299. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  300. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  301. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  302. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  303. teradataml/analytics/mle/json/glm_mle.json +0 -111
  304. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  305. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  306. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  307. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  308. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  309. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  310. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  311. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  312. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  313. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  314. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  315. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  316. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  317. teradataml/analytics/mle/json/knn_mle.json +0 -141
  318. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  319. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  320. teradataml/analytics/mle/json/lar_mle.json +0 -78
  321. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  322. teradataml/analytics/mle/json/lda_mle.json +0 -130
  323. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  324. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  325. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  326. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  327. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  328. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  329. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  330. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  331. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  332. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  333. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  334. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  335. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  336. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  337. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  338. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  339. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  340. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  341. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  342. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  343. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  344. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  345. teradataml/analytics/mle/json/pack_mle.json +0 -58
  346. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  347. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  348. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  349. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  350. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  351. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  352. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  353. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  354. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  355. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  356. teradataml/analytics/mle/json/roc_mle.json +0 -73
  357. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  358. teradataml/analytics/mle/json/sax_mle.json +0 -154
  359. teradataml/analytics/mle/json/scale_mle.json +0 -93
  360. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  361. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  362. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  363. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  364. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  365. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  366. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  367. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  368. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  369. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  370. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  371. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  372. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  373. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  374. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  375. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  376. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  377. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  378. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  379. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  380. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  381. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  382. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  383. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  384. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  385. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  386. teradataml/analytics/mle/json/tf_mle.json +0 -33
  387. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  388. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  389. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  390. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  391. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  392. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  393. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  394. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  395. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  396. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  397. teradataml/analytics/sqle/Antiselect.py +0 -321
  398. teradataml/analytics/sqle/Attribution.py +0 -603
  399. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  400. teradataml/analytics/sqle/GLMPredict.py +0 -430
  401. teradataml/analytics/sqle/MovingAverage.py +0 -543
  402. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  403. teradataml/analytics/sqle/NPath.py +0 -632
  404. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  405. teradataml/analytics/sqle/Pack.py +0 -388
  406. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  407. teradataml/analytics/sqle/Sessionize.py +0 -390
  408. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  409. teradataml/analytics/sqle/Unpack.py +0 -503
  410. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  411. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  412. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  413. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  414. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  415. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  416. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  417. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  418. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  419. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  420. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  421. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  422. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  423. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  424. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  425. teradataml/catalog/model_cataloging.py +0 -980
  426. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  427. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  428. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  429. teradataml/table_operators/sandbox_container_util.py +0 -643
  430. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +0 -0
  431. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
  432. {teradataml-17.20.0.6.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +0 -0
@@ -0,0 +1,1270 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2024 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Sweta Shaw
7
+ # Email Id: Sweta.Shaw@Teradata.com
8
+ #
9
+ # Secondary Owner: Akhil Bisht
10
+ # Email Id: AKHIL.BISHT@Teradata.com
11
+ #
12
+ # Version: 1.1
13
+ # Function Version: 1.0
14
+ # ##################################################################
15
+
16
+ import json
17
+
18
+
19
+ class _GenerateCustomJson:
20
+
21
+ def __init__(self):
22
+ """
23
+ DESCRIPTION:
24
+ Function initializes the data and flags for custom JSON file generation.
25
+
26
+ """
27
+ # Initializing data dictionary for storing custom parameters
28
+ self.data = {}
29
+ # Initializing first time execution flag variables for each phase
30
+ self.fe_flag = {index : False for index in range(1, 8)}
31
+ self.de_flag = {index : False for index in range(1, 5)}
32
+ self.mt_flag = {index : False for index in range(1, 2)}
33
+
34
+ def _process_list_input(self,
35
+ input_data,
36
+ value_type='str',
37
+ allowed_values=None):
38
+ """
39
+ DESCRIPTION:
40
+ Function processes input data contaning one or more than one, expected
41
+ to be comma separated and converts them into list of specified type.
42
+
43
+ PARAMETERS:
44
+ input_data:
45
+ Required Argument.
46
+ Specifies the input data to be processed.
47
+ Types: str
48
+
49
+ value_type:
50
+ Optional Argument.
51
+ Specifies the type of value present in input data.
52
+ Default Value: "str"
53
+ Types: str
54
+
55
+ allowed_values:
56
+ Optional Argument.
57
+ Specifies the list of allowed values for input data.
58
+ Default Value: None
59
+ Types: list
60
+
61
+ RETURNS:
62
+ List containing values of specified type.
63
+
64
+ RAISES:
65
+ ValueError: If input data is empty or not valid.
66
+ """
67
+ while True:
68
+ try:
69
+ # Checking if input is empty
70
+ if not input_data.strip():
71
+ raise ValueError("\nInput data cannot be empty. "
72
+ "Please provide a valid comma separated input.")
73
+ # Processing multi-valued input data
74
+ if value_type == 'int':
75
+ result = [int(value.strip()) for value in input_data.split(',')]
76
+ elif value_type == 'float':
77
+ result = [float(value.strip()) for value in input_data.split(',')]
78
+ elif value_type == 'bool':
79
+ result = [True if value.strip().lower() == 'true' else False for value in input_data.split(',')]
80
+ else:
81
+ result = [value.strip() for value in input_data.split(',')]
82
+
83
+ if allowed_values:
84
+ for value in result:
85
+ if value not in allowed_values:
86
+ raise ValueError(f"\nInvalid input {value}. "
87
+ f"Please provide a valid input from {allowed_values}.")
88
+ return result
89
+ # Handling exceptions for invalid input
90
+ except ValueError as msg:
91
+ print(f"\n**ERROR:** {msg}")
92
+ # Ask the user to try again
93
+ input_data = input("\nEnter the correct input: ")
94
+
95
+ def _process_single_input(self,
96
+ input_data,
97
+ value_type='str',
98
+ allowed_values=None):
99
+ """
100
+ DESCRIPTION:
101
+ Function processes the input data containing only single value and
102
+ converts it into specified type.
103
+
104
+ PARAMETERS:
105
+ input_data:
106
+ Required Argument.
107
+ Specifies the input data to be processed.
108
+ Types: str
109
+
110
+ value_type:
111
+ Optional Argument.
112
+ Specifies the type of value present in input data.
113
+ Default Value: "str"
114
+ Types: str
115
+
116
+ RETURNS:
117
+ Value of specified type.
118
+
119
+ RAISES:
120
+ ValueError: If input data is empty or not valid.
121
+ """
122
+ while True:
123
+ try:
124
+ # Checking if input is empty
125
+ if not input_data.strip():
126
+ raise ValueError("\nInput data cannot be empty. "
127
+ "Please provide a valid input.")
128
+ # Processing single value input data
129
+ if value_type == 'int':
130
+ result = int(input_data)
131
+ elif value_type == 'float':
132
+ result = float(input_data)
133
+ elif value_type == 'bool':
134
+ result = True if input_data.lower() == 'true' else False
135
+ else:
136
+ result = input_data
137
+
138
+ if allowed_values:
139
+ if result not in allowed_values:
140
+ raise ValueError(f"\nInvalid input {result}. "
141
+ f"Please provide a valid input from {allowed_values}.")
142
+ return result
143
+ # Handling exceptions for invalid input
144
+ except ValueError as msg:
145
+ print(f"\n**ERROR:** {msg}")
146
+ # Ask the user to try again
147
+ input_data = input("\nEnter the correct input: ")
148
+
149
+ def _generate_custom_json(self):
150
+ """
151
+ DESCRIPTION:
152
+ Function collects customized user input using prompt for feature enginnering,
153
+ data preparation and model training phases.
154
+
155
+ RETURNS:
156
+ Dictionary containing custom parameters to generate custom JSON file for AutoML.
157
+ """
158
+
159
+ print("\nGenerating custom config JSON for AutoML ...")
160
+
161
+ customize_options = {
162
+ 1: 'Customize Feature Engineering Phase',
163
+ 2: 'Customize Data Preparation Phase',
164
+ 3: 'Customize Model Training Phase',
165
+ 4: 'Generate custom json and exit'
166
+ }
167
+
168
+ while True:
169
+
170
+ print(f"\nAvailable main options for customization with corresponding indices: ")
171
+ print("-"*80)
172
+ for index, options in customize_options.items():
173
+ print(f"\nIndex {index}: {options}")
174
+ print("-"*80)
175
+ # Mapping each index to corresponding functionality
176
+ custom_method_map = {
177
+ 1: self._get_customize_input_feature_engineering,
178
+ 2: self._get_customize_input_data_preparation,
179
+ 3: self._get_customize_input_model_training
180
+ }
181
+
182
+ # Taking required input for customizing feature engineering, data preparation and model training phases
183
+ phase_idx = self._process_single_input(
184
+ input("\nEnter the index you want to customize: "),
185
+ 'int', list(customize_options.keys()))
186
+ # Checking if user wants to exit
187
+ if phase_idx == 4:
188
+ print("\nGenerating custom json and exiting ...")
189
+ break
190
+ else:
191
+ # Processing each functionality for customization
192
+ # Getting exit flag to exit from main menu
193
+ exit_flag = custom_method_map[phase_idx]()
194
+ if exit_flag:
195
+ break
196
+
197
+ print("\nProcess of generating custom config file for AutoML has been completed successfully.")
198
+ # Returning custom parameters
199
+ return self.data
200
+
201
+ def _get_customize_input_feature_engineering(self):
202
+ """
203
+ DESCRIPTION:
204
+ Function takes user input for different functionalities to customize
205
+ feature engineering phase.
206
+ """
207
+
208
+ print("\nCustomizing Feature Engineering Phase ...")
209
+ # Available options for customization of feature engineering phase
210
+ fe_customize_options = {
211
+ 1: 'Customize Missing Value Handling',
212
+ 2: 'Customize Bincode Encoding',
213
+ 3: 'Customize String Manipulation',
214
+ 4: 'Customize Categorical Encoding',
215
+ 5: 'Customize Mathematical Transformation',
216
+ 6: 'Customize Nonlinear Transformation',
217
+ 7: 'Customize Antiselect Features',
218
+ 8: 'Back to main menu',
219
+ 9: 'Generate custom json and exit'
220
+ }
221
+
222
+ while True:
223
+
224
+ print(f"\nAvailable options for customization of feature engineering phase with corresponding indices: ")
225
+ print("-"*80)
226
+ for index, options in fe_customize_options.items():
227
+ print(f"\nIndex {index}: {options}")
228
+ print("-"*80)
229
+ # Mapping each index to corresponding functionality
230
+ fe_method_map = {
231
+ 1: self._get_customize_input_missing_value_handling,
232
+ 2: self._get_customize_input_bin_code_encoding,
233
+ 3: self._get_customize_input_string_manipulation,
234
+ 4: self._get_customize_input_categorical_encoding,
235
+ 5: self._get_customize_input_mathematical_transformation,
236
+ 6: self._get_customize_input_nonlinear_transformation,
237
+ 7: self._get_customize_input_antiselect
238
+ }
239
+
240
+ # Taking required input for customizing feature engineering
241
+ fe_phase_idx = self._process_list_input(
242
+ input("\nEnter the list of indices you want to customize in feature engineering phase: "),
243
+ 'int', list(fe_customize_options.keys()))
244
+
245
+ # Flag variable to back to main menu
246
+ fe_exit_to_main_flag = False
247
+ # Flag variable to exit from main menu
248
+ # Handling the scenario when input contains both index 8 and 9
249
+ fe_exit_from_main_flag = 9 in fe_phase_idx
250
+
251
+ # Processing each functionality for customization in sorted order
252
+ for index in sorted(fe_phase_idx):
253
+ if index == 8 or index == 9:
254
+ fe_exit_to_main_flag = True
255
+ if index == 9:
256
+ fe_exit_from_main_flag = True
257
+ break
258
+ fe_method_map[index](self.fe_flag[index])
259
+ self.fe_flag[index] = True
260
+ # Checking if user wants to return to main menu
261
+ if fe_exit_to_main_flag:
262
+ print("\nCustomization of feature engineering phase has been completed successfully.")
263
+ break
264
+ # Returning flag to exit from main menu
265
+ return fe_exit_from_main_flag
266
+
267
+ def _get_customize_input_data_preparation(self):
268
+ """
269
+ DESCRIPTION:
270
+ Function takes user input for different functionalities to customize
271
+ data preparation phase.
272
+ """
273
+ print("\nCustomizing Data Preparation Phase ...")
274
+ # Available options for customization of data preparation phase
275
+ dp_customize_options = {
276
+ 1: 'Customize Train Test Split',
277
+ 2: 'Customize Data Imbalance Handling',
278
+ 3: 'Customize Outlier Handling',
279
+ 4: 'Customize Feature Scaling',
280
+ 5: 'Back to main menu',
281
+ 6: 'Generate custom json and exit'
282
+ }
283
+
284
+ while True:
285
+
286
+ print(f"\nAvailable options for customization of data preparation phase with corresponding indices: ")
287
+ print("-"*80)
288
+ for index, options in dp_customize_options.items():
289
+ print(f"\nIndex {index}: {options}")
290
+ print("-"*80)
291
+ # Mapping each index to corresponding functionality
292
+ de_method_map = {
293
+ 1: self._get_customize_input_train_test_split,
294
+ 2: self._get_customize_input_data_imbalance_handling,
295
+ 3: self._get_customize_input_outlier_handling,
296
+ 4: self._get_customize_input_feature_scaling
297
+ }
298
+
299
+ # Taking required input for customizing data preparation.
300
+ dp_phase_idx = self._process_list_input(
301
+ input("\nEnter the list of indices you want to customize in data preparation phase: "),
302
+ 'int', list(dp_customize_options.keys()))
303
+
304
+ # Flag variable to back to main menu
305
+ de_exit_to_main_flag = False
306
+ # Flag variable to exit from main menu
307
+ # Handling the scenario when input contains both index 5 and 6
308
+ de_exit_from_main_flag = 6 in dp_phase_idx
309
+
310
+ # Processing each functionality for customization in sorted order
311
+ for index in sorted(dp_phase_idx):
312
+ if index == 5 or index == 6:
313
+ de_exit_to_main_flag = True
314
+ if index == 6:
315
+ de_exit_from_main_flag = True
316
+ break
317
+ de_method_map[index](self.de_flag[index])
318
+ self.de_flag[index] = True
319
+ # Checking if user wants to return to main menu
320
+ if de_exit_to_main_flag:
321
+ print("\nCustomization of data preparation phase has been completed successfully.")
322
+ break
323
+ # Returning flag to exit from main menu
324
+ return de_exit_from_main_flag
325
+
326
+ def _get_customize_input_model_training(self):
327
+ """
328
+ DESCRIPTION:
329
+ Function takes user input for different functionalities to customize
330
+ model training phase.
331
+ """
332
+ print("\nCustomizing Model Training Phase ...")
333
+ # Available options for customization of model training phase
334
+ mt_customize_options = {
335
+ 1: 'Customize Model Hyperparameter',
336
+ 2: 'Back to main menu',
337
+ 3: 'Generate custom json and exit'
338
+ }
339
+
340
+ while True:
341
+
342
+ print(f"\nAvailable options for customization of model training phase with corresponding indices: ")
343
+ print("-"*80)
344
+ for index, options in mt_customize_options.items():
345
+ print(f"\nIndex {index}: {options}")
346
+ print("-"*80)
347
+
348
+ # Taking required input for customizing model training.
349
+ mt_phase_idx = self._process_list_input(
350
+ input("\nEnter the list of indices you want to customize in model training phase: "),
351
+ 'int', list(mt_customize_options.keys()))
352
+
353
+ # Flag variable to back to main menu
354
+ mt_exit_to_main_flag = False
355
+ # Flag variable to exit from main menu
356
+ # Handling the scenario when input contains both index 2 and 3
357
+ mt_exit_from_main_flag = 3 in mt_phase_idx
358
+
359
+ # Processing each functionality for customization in sorted order
360
+ for index in sorted(mt_phase_idx):
361
+ if index == 1:
362
+ self._get_customize_input_model_hyperparameter(self.mt_flag[index])
363
+ elif index == 2 or index == 3:
364
+ mt_exit_to_main_flag = True
365
+ if index == 3:
366
+ mt_exit_from_main_flag = True
367
+ break
368
+ self.mt_flag[index] = True
369
+ # Checking if user wants to return to main menu
370
+ if mt_exit_to_main_flag:
371
+ print("\nCustomization of model training phase has been completed successfully.")
372
+ break
373
+ # Returning flag to exit from main menu
374
+ return mt_exit_from_main_flag
375
+
376
+ def _get_customize_input_missing_value_handling(self,
377
+ first_execution_flag=False):
378
+ """
379
+ DESCRIPTION:
380
+ Function takes user input to generate custom json paramaters for missing value handling.
381
+
382
+ PARAMETERS:
383
+ first_execution_flag:
384
+ Optional Argument.
385
+ Specifies the flag to check if the function is called for the first time.
386
+ Default Value: False
387
+ Types: bool
388
+ """
389
+ if first_execution_flag:
390
+ print("\nWARNING : Reinitiated missing value handling customization. "
391
+ "Overwriting the previous input.")
392
+
393
+ print("\nCustomizing Missing Value Handling ...")
394
+ # Setting indicator for missing value handling
395
+ self.data['MissingValueHandlingIndicator'] = True
396
+ print("\nProvide the following details to customize missing value handling:")
397
+ # Setting parameters for missing value handling
398
+ self.data['MissingValueHandlingParam'] = {}
399
+
400
+ missing_handling_methods = {1: 'Drop Columns',
401
+ 2: 'Drop Rows',
402
+ 3: 'Impute Missing values'}
403
+
404
+ print("\nAvailable missing value handling methods with corresponding indices: ")
405
+ for index, method in missing_handling_methods.items():
406
+ print(f"Index {index}: {method}")
407
+
408
+ missing_handling_methods_idx = self._process_list_input(
409
+ input("\nEnter the list of indices for missing value handling methods : "),
410
+ 'int', list(missing_handling_methods.keys()))
411
+
412
+ for index in missing_handling_methods_idx:
413
+ if index == 1:
414
+ # Setting indicator for dropping columns with missing values
415
+ self.data['MissingValueHandlingParam']['DroppingColumnIndicator'] = True
416
+ drop_col_list = self._process_list_input(
417
+ input("\nEnter the feature or list of features for dropping columns with missing values: "))
418
+ self.data['MissingValueHandlingParam']['DroppingColumnList'] = drop_col_list
419
+ elif index == 2:
420
+ self.data['MissingValueHandlingParam']['DroppingRowIndicator'] = True
421
+ drop_row_list = self._process_list_input(
422
+ input("\nEnter the feature or list of features for dropping rows with missing values: "))
423
+ self.data['MissingValueHandlingParam']['DroppingRowList'] = drop_row_list
424
+ elif index == 3:
425
+ self.data['MissingValueHandlingParam']['ImputeMissingIndicator'] = True
426
+
427
+ impute_methods = {1: 'Statistical Imputation',
428
+ 2: 'Literal Imputation'}
429
+ print("\nAvailable missing value imputation methods with corresponding indices: ")
430
+ for index, method in impute_methods.items():
431
+ print(f"Index {index}: {method}")
432
+
433
+ impute_methods_idx = self._process_list_input(
434
+ input("\nEnter the list of corresponding index missing value imputation methods you want to use: "),
435
+ 'int', list(impute_methods.keys()))
436
+
437
+ for index in impute_methods_idx:
438
+ if index == 1:
439
+ stat_imp_list = self._process_list_input(
440
+ input("\nEnter the feature or list of features for imputing missing values using statistic values: "))
441
+ self.data['MissingValueHandlingParam']['StatImputeList'] = stat_imp_list
442
+
443
+ # Displaying available statistical imputation methods
444
+ stat_methods = {1: 'min',
445
+ 2: 'max',
446
+ 3: 'mean',
447
+ 4: 'median',
448
+ 5: 'mode'}
449
+ print("\nAvailable statistical methods with corresponding indices:")
450
+ for index, method in stat_methods.items():
451
+ print(f"Index {index}: {method}")
452
+
453
+ self.data['MissingValueHandlingParam']['StatImputeMethod'] = []
454
+ # Setting statistical imputation methods for features
455
+ for feature in stat_imp_list:
456
+ method_idx = self._process_single_input(
457
+ input(f"\nEnter the index of corresponding statistic imputation "
458
+ f"method for feature {feature}: "),
459
+ 'int', list(stat_methods.keys()))
460
+ self.data['MissingValueHandlingParam']['StatImputeMethod'].append(stat_methods[method_idx])
461
+ elif index == 2:
462
+ literal_imp_list = self._process_list_input(
463
+ input("\nEnter the feature or list of features for imputing missing values "
464
+ "using a specific value(Literal): "))
465
+ # Setting list of features for imputing missing values using specific literal value
466
+ self.data['MissingValueHandlingParam']['LiteralImputeList'] = literal_imp_list
467
+ self.data['MissingValueHandlingParam']['LiteralImputeValue'] = []
468
+ for feature in literal_imp_list:
469
+ # Setting specific literal value for imputing missing values for each feature
470
+ literal_value = self._process_single_input(
471
+ input(f"\nEnter the specific literal value for imputing missing "
472
+ f"values for feature {feature}: "))
473
+ self.data['MissingValueHandlingParam']['LiteralImputeValue'].append(literal_value)
474
+
475
+ print("\nCustomization of missing value handling has been completed successfully.")
476
+
477
+ def _get_customize_input_bin_code_encoding(self,
478
+ first_execution_flag=False):
479
+ """
480
+ DESCRIPTION:
481
+ Function takes user input to generate custom json paramaters for performing binning on features.
482
+
483
+ PARAMETERS:
484
+ first_execution_flag:
485
+ Optional Argument.
486
+ Specifies the flag to check if the function is called for the first time.
487
+ Default Value: False
488
+ Types: bool
489
+
490
+ """
491
+ if first_execution_flag:
492
+ print("\nWARNING : Reinitiated bincode encoding customization. "
493
+ "Overwriting the previous input.")
494
+
495
+ print("\nCustomizing Bincode Encoding ...")
496
+ # Setting indicator for binning
497
+ self.data['BincodeIndicator'] = True
498
+ print("\nProvide the following details to customize binning and coding encoding:")
499
+ self.data['BincodeParam'] = {}
500
+
501
+ # Displaying available binning methods
502
+ binning_methods = {1: 'Equal-Width',
503
+ 2: 'Variable-Width'}
504
+ print("\nAvailable binning methods with corresponding indices:")
505
+ for index, method in binning_methods.items():
506
+ print(f"Index {index}: {method}")
507
+
508
+ # Setting parameters for binning
509
+ binning_list = self._process_list_input(input("\nEnter the feature or list of features for binning: "))
510
+ if binning_list:
511
+ for feature in binning_list:
512
+ # Setting parameters for binning each feature
513
+ self.data['BincodeParam'][feature] = {}
514
+ bin_method_idx = self._process_single_input(
515
+ input(f"\nEnter the index of corresponding binning method for feature {feature}: "),
516
+ 'int', list(binning_methods.keys()))
517
+
518
+ # Setting binning method and number of bins for each feature
519
+ self.data['BincodeParam'][feature]["Type"] = binning_methods[bin_method_idx]
520
+ num_of_bin = self._process_single_input(
521
+ input(f"\nEnter the number of bins for feature {feature}: "), 'int')
522
+ self.data['BincodeParam'][feature]["NumOfBins"] = num_of_bin
523
+
524
+ # Setting parameters for each bin of feature in case of variable width binning
525
+ if bin_method_idx == 2:
526
+ value_type = {
527
+ 1: 'int',
528
+ 2: 'float'
529
+ }
530
+ print("\nAvailable value type of feature for variable binning with corresponding indices:")
531
+ for index, v_type in value_type.items():
532
+ print(f"Index {index}: {v_type}")
533
+ # Setting parameters for each bin of feature
534
+ for num in range(1, num_of_bin+1):
535
+ print(f"\nProvide the range for bin {num} of feature {feature}: ")
536
+ bin_num="Bin_"+str(num)
537
+ self.data['BincodeParam'][feature][bin_num] = {}
538
+
539
+ # Setting bin value type for corresponding bin
540
+ bin_value_type_idx = self._process_single_input(
541
+ input(f"\nEnter the index of corresponding value type of feature {feature}: "),
542
+ 'int', list(value_type.keys()))
543
+
544
+ bin_value_type = value_type[bin_value_type_idx]
545
+
546
+ # Setting minimum value for corresponding bin
547
+ self.data['BincodeParam'][feature][bin_num]['min_value'] = self._process_single_input(
548
+ input(f"\nEnter the minimum value for bin {num} of feature {feature}: "),
549
+ bin_value_type)
550
+ # Setting maximum value for corresponding bin
551
+ self.data['BincodeParam'][feature][bin_num]['max_value'] = self._process_single_input(
552
+ input(f"\nEnter the maximum value for bin {num} of feature {feature}: "),
553
+ bin_value_type)
554
+ # Setting label for corresponding bin
555
+ self.data['BincodeParam'][feature][bin_num]['label'] = self._process_single_input(
556
+ input(f"\nEnter the label for bin {num} of feature {feature}: "))
557
+
558
+ print("\nCustomization of bincode encoding has been completed successfully.")
559
+
560
+ def _get_customize_input_string_manipulation(self,
561
+ first_execution_flag=False):
562
+ """
563
+ DESCRIPTION:
564
+ Function takes user input to generate custom json paramaters for string manipulation.
565
+
566
+ PARAMETERS:
567
+ first_execution_flag:
568
+ Optional Argument.
569
+ Specifies the flag to check if the function is called for the first time.
570
+ Default Value: False
571
+ Types: bool
572
+
573
+ """
574
+ if first_execution_flag:
575
+ print("\nWARNING : Reinitiated string manipulation customization. "
576
+ "Overwriting the previous input.")
577
+
578
+ print("\nCustomizing String Manipulation ...")
579
+ # Setting indicator for string manipulation
580
+ self.data['StringManipulationIndicator'] = True
581
+ print("\nProvide the following details to customize string manipulation:")
582
+ self.data['StringManipulationParam'] = {}
583
+ # Displaying available string manipulation methods
584
+ string_methods = {1: 'ToLower',
585
+ 2: 'ToUpper',
586
+ 3: 'StringCon',
587
+ 4: 'StringPad',
588
+ 5: 'Substring'}
589
+ print("\nAvailable string manipulation methods with corresponding indices:")
590
+ for index, method in string_methods.items():
591
+ print(f"Index {index}: {method}")
592
+
593
+ # Setting parameters for string manipulation
594
+ str_mnpl_list = self._process_list_input(
595
+ input("\nEnter the feature or list of features for string manipulation: "))
596
+ # Processing each feature
597
+ if str_mnpl_list:
598
+ for feature in str_mnpl_list:
599
+ # Setting parameters for string manipulation each feature
600
+ self.data['StringManipulationParam'][feature] = {}
601
+ str_mnpl_method_idx = self._process_single_input(
602
+ input(f"\nEnter the index of corresponding string manipulation "
603
+ f"method for feature {feature}: "), 'int', list(string_methods.keys()))
604
+ self.data['StringManipulationParam'][feature]["StringOperation"] = \
605
+ string_methods[str_mnpl_method_idx]
606
+ # Setting required parameters specific to each string manipulation method
607
+ if str_mnpl_method_idx in [3, 4]:
608
+ str_mnpl_string = self._process_single_input(
609
+ input(f"\nEnter the string value required for string manipulation "
610
+ f"operation for feature {feature}: "))
611
+ self.data['StringManipulationParam'][feature]["String"] = str_mnpl_string
612
+
613
+ if str_mnpl_method_idx in [4, 5]:
614
+ str_mnpl_length = self._process_single_input(
615
+ input(f"\nEnter the length value required for string manipulation "
616
+ f"operation for feature {feature}: "), 'int')
617
+ self.data['StringManipulationParam'][feature]["StringLength"] = str_mnpl_length
618
+
619
+ if str_mnpl_method_idx == 5:
620
+ str_mnpl_start = self._process_single_input(
621
+ input(f"\nEnter the start value required for string manipulation "
622
+ f"operation for feature {feature}: "), 'int')
623
+ self.data['StringManipulationParam'][feature]["StartIndex"] = str_mnpl_start
624
+
625
+ print("\nCustomization of string manipulation has been completed successfully.")
626
+
627
+
628
+ def _get_customize_input_categorical_encoding(self,
629
+ first_execution_flag=False):
630
+ """
631
+ DESCRIPTION:
632
+ Function takes user input to generate custom json paramaters for categorical encoding.
633
+
634
+ PARAMETERS:
635
+ first_execution_flag:
636
+ Optional Argument.
637
+ Specifies the flag to check if the function is called for the first time.
638
+ Default Value: False
639
+ Types: bool
640
+
641
+ """
642
+ if first_execution_flag:
643
+ print("\nWARNING : Reinitiated categorical encoding customization. "
644
+ "Overwriting the previous input.")
645
+
646
+ print("\nCustomizing Categorical Encoding ...")
647
+ # Setting indicator for categorical encoding
648
+ self.data['CategoricalEncodingIndicator'] = True
649
+ print("\nProvide the following details to customize categorical encoding:")
650
+ # Setting parameters for categorical encoding
651
+ self.data['CategoricalEncodingParam'] = {}
652
+
653
+ encoding_methods = {1: 'OneHotEncoding',
654
+ 2: 'OrdinalEncoding',
655
+ 3: 'TargetEncoding'}
656
+
657
+ print("\nAvailable categorical encoding methods with corresponding indices:")
658
+ for index, method in encoding_methods.items():
659
+ print(f"Index {index}: {method}")
660
+
661
+ encoding_methods_idx = self._process_list_input(
662
+ input("\nEnter the list of corresponding index categorical encoding methods you want to use: "),
663
+ 'int', list(encoding_methods.keys()))
664
+
665
+ for index in encoding_methods_idx:
666
+ if index == 1:
667
+ # Setting indicator for OneHotEncoding
668
+ self.data['CategoricalEncodingParam']['OneHotEncodingIndicator'] = True
669
+ # Setting parameters for OneHotEncoding
670
+ one_hot_list = self._process_list_input(
671
+ input("\nEnter the feature or list of features for OneHotEncoding: "))
672
+ self.data['CategoricalEncodingParam']['OneHotEncodingList'] = one_hot_list
673
+ elif index == 2:
674
+ # Setting indicator for OrdinalEncoding
675
+ self.data['CategoricalEncodingParam']['OrdinalEncodingIndicator'] = True
676
+ # Setting parameters for OrdinalEncoding
677
+ ordinal_list = self._process_list_input(
678
+ input("\nEnter the feature or list of features for OrdinalEncoding: "))
679
+ self.data['CategoricalEncodingParam']['OrdinalEncodingList'] = ordinal_list
680
+ elif index == 3:
681
+ # Setting indicator for TargetEncoding
682
+ self.data['CategoricalEncodingParam']['TargetEncodingIndicator'] = True
683
+ target_end_list = self._process_list_input(input("\nEnter the feature or list of features for TargetEncoding: "))
684
+ # Setting parameters for TargetEncoding
685
+ self.data['CategoricalEncodingParam']['TargetEncodingList'] = {}
686
+ target_end_methods = {1: 'CBM_BETA',
687
+ 2: 'CBM_DIRICHLET',
688
+ 3: 'CBM_GAUSSIAN_INVERSE_GAMMA'}
689
+ print("\nAvailable target encoding methods with corresponding indices:")
690
+ for index, method in target_end_methods.items():
691
+ print(f"Index {index}: {method}")
692
+
693
+ # Setting parameters specific to each feature and corresponding method
694
+ for feature in target_end_list:
695
+ self.data['CategoricalEncodingParam']['TargetEncodingList'][feature] = {}
696
+ end_method_idx = self._process_single_input(
697
+ input(f"\nEnter the index of target encoding method for feature {feature}: "),
698
+ 'int', list(target_end_methods.keys()))
699
+ # Setting target encoding method for each feature
700
+ self.data['CategoricalEncodingParam']['TargetEncodingList'][feature]["encoder_method"] = \
701
+ target_end_methods[end_method_idx]
702
+
703
+ # Setting response column for target encoding method
704
+ response_column = self._process_single_input(
705
+ input(f"\nEnter the response column for target encoding method for feature {feature}: "))
706
+ self.data['CategoricalEncodingParam']['TargetEncodingList'][feature]["response_column"] = \
707
+ response_column
708
+
709
+ # Getting specific parameter in case of CBM_DIRICHLET method
710
+ if end_method_idx == 2:
711
+ num_distinct_responses = self._process_single_input(
712
+ input(f"\nEnter the distinct count of response column "
713
+ f"for target encoding method for feature {feature}: "), 'int')
714
+ self.data['CategoricalEncodingParam']['TargetEncodingList'][feature]["num_distinct_responses"] = \
715
+ num_distinct_responses
716
+
717
+ print("\nCustomization of categorical encoding has been completed successfully.")
718
+
719
+ def _get_customize_input_mathematical_transformation(self,
720
+ first_execution_flag=False):
721
+ """
722
+ DESCRIPTION:
723
+ Function takes user input to generate custom json paramaters for mathematical transformation.
724
+
725
+ PARAMETERS:
726
+ first_execution_flag:
727
+ Optional Argument.
728
+ Specifies the flag to check if the function is called for the first time.
729
+ Default Value: False
730
+ Types: bool
731
+
732
+ """
733
+ if first_execution_flag:
734
+ print("\nWARNING : Reinitiated mathematical transformation customization. "
735
+ "Overwriting the previous input.")
736
+
737
+ print("\nCustomizing Mathematical Transformation ...")
738
+ # Setting indicator for mathematical transformation
739
+ self.data['MathameticalTransformationIndicator'] = True
740
+ print("\nProvide the following details to customize mathematical transformation:")
741
+ # Setting parameters for mathematical transformation
742
+ self.data['MathameticalTransformationParam'] = {}
743
+ mat_trans_methods = {1: 'sigmoid',
744
+ 2: 'sininv',
745
+ 3: 'log',
746
+ 4: 'pow',
747
+ 5: 'exp'}
748
+ print("\nAvailable mathematical transformation methods with corresponding indices:")
749
+ for index, method in mat_trans_methods.items():
750
+ print(f"Index {index}: {method}")
751
+
752
+ mat_trans_list = self._process_list_input(
753
+ input("\nEnter the feature or list of features for mathematical transformation: "))
754
+ if mat_trans_list:
755
+ for feature in mat_trans_list:
756
+ # Setting parameters for mathematical transformation specific to each feature
757
+ self.data['MathameticalTransformationParam'][feature] = {}
758
+ mat_trans_method_idx = self._process_single_input(
759
+ input(f"\nEnter the index of corresponding mathematical "
760
+ f"transformation method for feature {feature}: "),
761
+ 'int', list(mat_trans_methods.keys()))
762
+
763
+ self.data['MathameticalTransformationParam'][feature]["apply_method"] = \
764
+ mat_trans_methods[mat_trans_method_idx]
765
+ # Setting required parameters specific to each mathematical transformation method
766
+ if mat_trans_method_idx == 1 :
767
+ sigmoid_style = self._process_single_input(
768
+ input(f"\nEnter the sigmoid style required for mathematical "
769
+ f"transformation for feature {feature}: "))
770
+ self.data['MathameticalTransformationParam'][feature]["sigmoid_style"] = \
771
+ sigmoid_style
772
+
773
+ if mat_trans_method_idx == 3:
774
+ base = self._process_single_input(
775
+ input(f"\nEnter the base value required for mathematical "
776
+ f"transformation for feature {feature}: "), 'int')
777
+ self.data['MathameticalTransformationParam'][feature]["base"] = base
778
+
779
+ if mat_trans_method_idx == 4:
780
+ exponent = self._process_single_input(
781
+ input(f"\nEnter the exponent value required for mathematical "
782
+ f"transformation for feature {feature}: "), 'int')
783
+ self.data['MathameticalTransformationParam'][feature]["exponent"] = exponent
784
+
785
+ print("\nCustomization of mathematical transformation has been completed successfully.")
786
+
787
+ def _get_customize_input_nonlinear_transformation(self,
788
+ first_execution_flag=False):
789
+ """
790
+ DESCRIPTION:
791
+ Function takes user input to generate custom json paramaters for nonlinear transformation.
792
+
793
+ PARAMETERS:
794
+ first_execution_flag:
795
+ Optional Argument.
796
+ Specifies the flag to check if the function is called for the first time.
797
+ Default Value: False
798
+ Types: bool
799
+ """
800
+ if first_execution_flag:
801
+ print("\nWARNING : Reinitiated nonlinear transformation customization. "
802
+ "Overwriting the previous input.")
803
+
804
+ print("\nCustomizing Nonlinear Transformation ...")
805
+ # Setting indicator for nonlinear transformation
806
+ self.data['NonLinearTransformationIndicator'] = True
807
+ print("\nProvide the following details to customize nonlinear transformation:")
808
+ # Setting parameters for nonlinear transformation
809
+ self.data['NonLinearTransformationParam'] = {}
810
+
811
+ # Getting total number of non-linear combinations
812
+ total_combinations = self._process_single_input(
813
+ input("\nEnter number of non-linear combination you want to make: "), 'int')
814
+ for num in range(1, total_combinations+1):
815
+ print(f"\nProvide the details for non-linear combination {num}:")
816
+ # Creating combination name and setting parameters for each combination
817
+ combination = "Combination_"+str(num)
818
+ self.data['NonLinearTransformationParam'][combination] = {}
819
+ target_columns = self._process_list_input(
820
+ input(f"\nEnter the list of target feature/s for non-linear combination {num}: "))
821
+ self.data['NonLinearTransformationParam'][combination]["target_columns"] = target_columns
822
+
823
+ formula = self._process_single_input(
824
+ input(f"\nEnter the formula for non-linear combination {num}: "))
825
+ self.data['NonLinearTransformationParam'][combination]["formula"] = formula
826
+
827
+ result_column = self._process_single_input(
828
+ input(f"\nEnter the resultant feature for non-linear combination {num}: "))
829
+ self.data['NonLinearTransformationParam'][combination]["result_column"] = result_column
830
+
831
+ print("\nCustomization of nonlinear transformation has been completed successfully.")
832
+
833
+ def _get_customize_input_antiselect(self,
834
+ first_execution_flag=False):
835
+ """
836
+ DESCRIPTION:
837
+ Function takes user input to generate custom json paramaters for antiselect features.
838
+
839
+ PARAMETERS:
840
+ first_execution_flag:
841
+ Optional Argument.
842
+ Specifies the flag to check if the function is called for the first time.
843
+ Default Value: False
844
+ Types: bool
845
+ """
846
+ if first_execution_flag:
847
+ print("\nWARNING : Reinitiated nonlinear antiselect customization. "
848
+ "Overwriting the previous input.")
849
+
850
+ print("\nCustomizing Antiselect Features ...")
851
+ # Setting indicator and parameter for antiselect
852
+ self.data['AntiselectIndicator'] = True
853
+ self.data['AntiselectParam'] = self._process_list_input(
854
+ input("\nEnter the feature or list of features for antiselect: "))
855
+
856
+ print("\nCustomization of antiselect features has been completed successfully.")
857
+
858
+ def _get_customize_input_train_test_split(self,
859
+ first_execution_flag=False):
860
+ """
861
+ DESCRIPTION:
862
+ Function takes user input to generate custom json paramaters for train test split.
863
+
864
+ PARAMETERS:
865
+ first_execution_flag:
866
+ Optional Argument.
867
+ Specifies the flag to check if the function is called for the first time.
868
+ Default Value: False
869
+ Types: bool
870
+ """
871
+ if first_execution_flag:
872
+ print("\nWARNING : Reinitiated train test split customization. "
873
+ "Overwriting the previous input.")
874
+
875
+ print("\nCustomizing Train Test Split ...")
876
+ # Setting indicator and parameter for customizing train test split
877
+ self.data['TrainTestSplitIndicator'] = True
878
+ self.data['TrainingSize']= self._process_single_input(
879
+ input("\nEnter the train size for train test split: "), 'float')
880
+
881
+ print("\nCustomization of train test split has been completed successfully.")
882
+
883
+ def _get_customize_input_data_imbalance_handling(self,
884
+ first_execution_flag):
885
+ """
886
+ DESCRIPTION:
887
+ Function takes user input to generate custom json paramaters for data imbalance handling.
888
+
889
+ """
890
+ if first_execution_flag:
891
+ print("\nWARNING : Reinitiated data imbalance handling customization. "
892
+ "Overwriting the previous input.")
893
+
894
+ print("\nCustomizing Data Imbalance Handling ...")
895
+ # Setting indicator for data imbalance handling
896
+ self.data['DataImbalanceIndicator'] = True
897
+ sampling_methods = {1: 'SMOTE',
898
+ 2: 'NearMiss'}
899
+ print("\nAvailable data sampling methods with corresponding indices:")
900
+ for index, method in sampling_methods.items():
901
+ print(f"Index {index}: {method}")
902
+
903
+ sampling_mthd_idx = self._process_single_input(
904
+ input("\nEnter the corresponding index data imbalance handling method: "),
905
+ 'int', list(sampling_methods.keys()))
906
+ # Setting parameters for data imbalance handling
907
+ self.data['DataImbalanceMethod'] = sampling_methods[sampling_mthd_idx]
908
+
909
+ print("\nCustomization of data imbalance handling has been completed successfully.")
910
+
911
+
912
+ def _get_customize_input_outlier_handling(self,
913
+ first_execution_flag=False):
914
+ """
915
+ DESCRIPTION:
916
+ Function takes user input to generate custom json paramaters for outlier handling.
917
+
918
+ PARAMETERS:
919
+ first_execution_flag:
920
+ Optional Argument.
921
+ Specifies the flag to check if the function is called for the first time.
922
+ Default Value: False
923
+ Types: bool
924
+
925
+ """
926
+ if first_execution_flag:
927
+ print("\nWARNING : Reinitiated outlier handling customization. "
928
+ "Overwriting the previous input.")
929
+ keys_to_remove = ['OutlierLowerPercentile', 'OutlierUpperPercentile']
930
+ for key in keys_to_remove:
931
+ if key in self.data:
932
+ del self.data[key]
933
+
934
+
935
+ print("\nCustomizing Outlier Handling ...")
936
+ # Setting indicator for outlier handling
937
+ self.data['OutlierFilterIndicator'] = True
938
+ outlier_methods = {1: 'percentile',
939
+ 2: 'tukey',
940
+ 3: 'carling'}
941
+ print("\nAvailable outlier detection methods with corresponding indices:")
942
+ for index, method in outlier_methods.items():
943
+ print(f"Index {index}: {method}")
944
+
945
+ # Setting parameters for outlier handling
946
+ outlier_mthd_idx = self._process_single_input(
947
+ input("\nEnter the corresponding index oulier handling method: "),
948
+ 'int', list(outlier_methods.keys()))
949
+
950
+ self.data['OutlierFilterMethod'] = outlier_methods[outlier_mthd_idx]
951
+ # Setting parameters specific to method 'percentile'
952
+ if outlier_mthd_idx == 1:
953
+ self.data['OutlierLowerPercentile'] = self._process_single_input(
954
+ input("\nEnter the lower percentile value for outlier handling: "), 'float')
955
+ self.data['OutlierUpperPercentile'] = self._process_single_input(
956
+ input("\nEnter the upper percentile value for outlier handling: "), 'float')
957
+
958
+ # Setting parameters for outlier filteration
959
+ self.data['OutlierFilterParam'] = {}
960
+ outlier_list = self._process_list_input(
961
+ input("\nEnter the feature or list of features for outlier handling: "))
962
+
963
+ replacement_method = {
964
+ 1: 'delete',
965
+ 2: 'median',
966
+ 3: 'Any Numeric Value'
967
+ }
968
+
969
+ print("\nAvailable outlier replacement methods with corresponding indices:")
970
+ for index, value in replacement_method.items():
971
+ print(f"Index {index}: {value}")
972
+
973
+ # Setting parameters specific to each feature
974
+ for feature in outlier_list:
975
+ self.data['OutlierFilterParam'][feature] = {}
976
+ replacement_method_idx = self._process_single_input(
977
+ input(f"\nEnter the index of corresponding replacement method for feature {feature}: "),
978
+ 'int', list(replacement_method.keys()))
979
+
980
+ if replacement_method_idx != 3:
981
+ # Setting replacement method specific to each feature
982
+ self.data['OutlierFilterParam'][feature]["replacement_value"] = replacement_method[replacement_method_idx]
983
+ else:
984
+ replacement_value_types = {1: 'int',
985
+ 2: 'float'}
986
+ print("\nAvailable outlier replacement value types with corresponding indices:")
987
+ for index, value in replacement_value_types.items():
988
+ print(f"Index {index}: {value}")
989
+
990
+ replacement_value = input(f"\nEnter the replacement value for handling outlier for feature {feature}: ")
991
+
992
+ value_type_idx = self._process_single_input(
993
+ input(f"\nEnter the index of corresponding replacement value type for feature {feature}: "),
994
+ 'int', list(replacement_value_types.keys()))
995
+
996
+ # Setting replacement_value specific to each feature
997
+ self.data['OutlierFilterParam'][feature]["replacement_value"] = \
998
+ self._process_single_input(replacement_value, replacement_value_types[value_type_idx])
999
+
1000
+ print("\nCustomization of outlier handling has been completed successfully.")
1001
+
1002
+ def _get_customize_input_feature_scaling(self,
1003
+ first_execution_flag=False):
1004
+ """
1005
+ DESCRIPTION:
1006
+ Function takes user input to generate custom json paramaters for feature scaling.
1007
+
1008
+ PARAMETERS:
1009
+ first_execution_flag:
1010
+ Optional Argument.
1011
+ Specifies the flag to check if the function is called for the first time.
1012
+ Default Value: False
1013
+ Types: bool
1014
+
1015
+ """
1016
+ if first_execution_flag:
1017
+ print("\nWARNING : Reinitiated feature scaling customization. "
1018
+ "Overwriting the previous input.")
1019
+
1020
+ # Setting indicator for feature scaling
1021
+ self.data['FeatureScalingIndicator'] = True
1022
+ scaling_methods = {1: 'maxabs',
1023
+ 2: 'mean',
1024
+ 3: 'midrange',
1025
+ 4: 'range',
1026
+ 5: 'rescale',
1027
+ 6: 'std',
1028
+ 7: 'sum',
1029
+ 8: 'ustd'}
1030
+ # Displaying available methods for scaling
1031
+ print("\nAvailable feature scaling methods with corresponding indices:")
1032
+ for index, value in scaling_methods.items():
1033
+ print(f"Index {index}: {value}")
1034
+
1035
+ # Setting parameters for feature scaling
1036
+ scaling_methods_idx = self._process_single_input(
1037
+ input("\nEnter the corresponding index feature scaling method: "),
1038
+ 'int', list(scaling_methods.keys()))
1039
+
1040
+ # Handling for 'rescale' method
1041
+ if scaling_methods_idx != 5:
1042
+ self.data['FeatureScalingMethod'] = scaling_methods[scaling_methods_idx]
1043
+ else:
1044
+ rescaling_params = {
1045
+ 1: 'lower-bound',
1046
+ 2: 'upper-bound'
1047
+ }
1048
+ # Displaying available params for rescaling
1049
+ print("\nAvailable parameters required for rescaling with corresponding indices :")
1050
+ for index, value in rescaling_params.items():
1051
+ print(f"Index {index}: {value}")
1052
+
1053
+ rescaling_params_type = {1: 'int',
1054
+ 2: 'float'}
1055
+ # Displaying available params types for rescaling
1056
+ print("\nAvailable value types for rescaling params with corresponding indices:")
1057
+ for index, param_type in rescaling_params_type.items():
1058
+ print(f"Index {index}: {param_type}")
1059
+ scaling_param_idx_list = self._process_list_input(
1060
+ input("\nEnter the list of parameter indices for performing rescaling : "),
1061
+ 'int', list(rescaling_params.keys()))
1062
+ # Setting parameters for lower bound and upper bound
1063
+ lb = 0
1064
+ ub = 0
1065
+ for param_idx in scaling_param_idx_list:
1066
+ # Taking required input for lower bound
1067
+ if param_idx == 1:
1068
+ lower_bound = input("\nEnter value for lower bound :")
1069
+ value_type_idx = self._process_single_input(
1070
+ input("\nEnter the index of corresponding value type of lower bound :"),
1071
+ 'int', list(rescaling_params_type.keys()))
1072
+ lb = self._process_single_input(lower_bound, rescaling_params_type[value_type_idx])
1073
+ # Taking required input for upper bound
1074
+ elif param_idx == 2:
1075
+ upper_bound = input("\nEnter value for upper bound :")
1076
+ value_type_idx = self._process_single_input(
1077
+ input("\nEnter the index of corresponding value type of upper bound :"),
1078
+ 'int', list(rescaling_params_type.keys()))
1079
+ ub = self._process_single_input(upper_bound, rescaling_params_type[value_type_idx])
1080
+ # Creating string structure of 'rescale' method as per user input
1081
+ if lb and ub:
1082
+ scale_method = f'rescale(lb={lb}, ub={ub})'
1083
+ elif lb:
1084
+ scale_method = f'rescale(lb={lb})'
1085
+ elif ub:
1086
+ scale_method = f'rescale(ub={ub})'
1087
+ # Setting parameters for feature scaling
1088
+ self.data['FeatureScalingMethod'] = scale_method
1089
+
1090
+ print("\nCustomization of feature scaling has been completed successfully.")
1091
+
1092
+ def _get_allowed_hyperparameters(self, model_name):
1093
+ """
1094
+ DESCRIPTION:
1095
+ Function to get allowed hyperparameters for different models.
1096
+
1097
+ PARAMETERS:
1098
+ model_name:
1099
+ Required Argument.
1100
+ Specifies the model for which allowed hyperparameters are required.
1101
+ Types: str.
1102
+
1103
+ RETURNS:
1104
+ Allowed hyperparameters for model.
1105
+ """
1106
+ # Setting allowed common hyperparameters for tree like model
1107
+ allowed_common_hyperparameters_tree_model ={
1108
+ 1 : 'min_impurity',
1109
+ 2 : 'max_depth',
1110
+ 3 : 'min_node_size',
1111
+ }
1112
+ # Setting allowed hyperparameters for xgbooost model
1113
+ allowed_hyperparameters_xgboost = {
1114
+ **allowed_common_hyperparameters_tree_model,
1115
+ 4 : 'shrinkage_factor',
1116
+ 5 : 'iter_num'
1117
+ }
1118
+ # Setting allowed hyperparameters for decision forest model
1119
+ allowed_hyperparameters_decision_forest = {
1120
+ **allowed_common_hyperparameters_tree_model,
1121
+ 4 : 'num_trees'
1122
+ }
1123
+ # Setting allowed hyperparameters for knn model
1124
+ allowed_hyperparameters_knn = {
1125
+ 0 : 'k'
1126
+ }
1127
+ # Setting allowed hyperparameters for svm model
1128
+ allowed_hyperparameters_svm = {
1129
+ 1 : 'alpha',
1130
+ 2 : 'learning_rate',
1131
+ 3 : 'initial_eta',
1132
+ 4 : 'momentum',
1133
+ 5 : 'iter_num_no_change',
1134
+ 6 : 'iter_max',
1135
+ 7 : 'batch_size'
1136
+ }
1137
+ # Setting allowed hyperparameters for glm model
1138
+ allowed_hyperparameters_glm = {
1139
+ **allowed_hyperparameters_svm,
1140
+ 8 : 'tolerance',
1141
+ 9 : 'nesterov',
1142
+ 10 : 'intercept',
1143
+ 11 : 'local_sgd_iterations'
1144
+ }
1145
+ # Setting allowed hyperparameters for different models
1146
+ allowed_hyperparameters = {
1147
+ 'xgboost' : allowed_hyperparameters_xgboost,
1148
+ 'decision_forest' : allowed_hyperparameters_decision_forest,
1149
+ 'knn' : allowed_hyperparameters_knn,
1150
+ 'svm' : allowed_hyperparameters_svm,
1151
+ 'glm' : allowed_hyperparameters_glm
1152
+ }
1153
+ return allowed_hyperparameters[model_name]
1154
+
1155
+ def _get_allowed_hyperparameters_types(self, hyperparameter):
1156
+ """
1157
+ DESCRIPTION:
1158
+ Function to map allowed hyperparameter types for different hyperparameters.
1159
+
1160
+ PARAMETERS:
1161
+ hyperparameter:
1162
+ Required Argument.
1163
+ Specifies the hyperparamter for which allowed types are required.
1164
+ Types: str.
1165
+
1166
+ RETURNS:
1167
+ Allowed hyperparameters types for hyperparameter.
1168
+ """
1169
+ # Setting allowed hyperparameters types for different hyperparameters
1170
+ allowed_hyperparameters_types = {
1171
+ 'min_impurity' : 'float',
1172
+ 'max_depth' : 'int',
1173
+ 'min_node_size' : 'int',
1174
+ 'shrinkage_factor' : 'float',
1175
+ 'iter_num' : 'int',
1176
+ 'num_trees' : 'int',
1177
+ 'k' : 'int',
1178
+ 'alpha' : 'float',
1179
+ 'learning_rate' : 'str',
1180
+ 'initial_eta' : 'float',
1181
+ 'momentum' : 'float',
1182
+ 'iter_num_no_change' : 'int',
1183
+ 'iter_max' : 'int',
1184
+ 'batch_size' : 'int',
1185
+ 'tolerance' : 'float',
1186
+ 'nesterov' : 'bool',
1187
+ 'intercept' : 'bool',
1188
+ 'local_sgd_iterations' : 'int'
1189
+ }
1190
+ return allowed_hyperparameters_types[hyperparameter]
1191
+
1192
+ def _get_customize_input_model_hyperparameter(self,
1193
+ first_execution_flag):
1194
+ """
1195
+ DESCRIPTION:
1196
+ Function takes user input to generate custom json paramaters for model hyperparameter.
1197
+
1198
+ PARAMETERS:
1199
+ first_execution_flag:
1200
+ Required Argument.
1201
+ Specifies the flag to check if the function is called for the first time.
1202
+ Types: bool.
1203
+
1204
+ """
1205
+ if first_execution_flag:
1206
+ print("\nWARNING : Reinitiated model hyperparameter customization. "
1207
+ "Overwriting the previous input.")
1208
+
1209
+ print("\nCustomizing Model Hyperparameter ...")
1210
+ # Setting indicator for model hyperparameter tuning
1211
+ self.data['HyperparameterTuningIndicator'] = True
1212
+ self.data['HyperparameterTuningParam'] = {}
1213
+ all_models = {1: 'decision_forest',
1214
+ 2: 'xgboost',
1215
+ 3: 'knn',
1216
+ 4: 'glm',
1217
+ 5: 'svm'}
1218
+ # Displaying available models for hyperparameter tuning
1219
+ print("\nAvailable models for hyperparameter tuning with corresponding indices:")
1220
+ for index, model in all_models.items():
1221
+ print(f"Index {index}: {model}")
1222
+
1223
+ update_methods = {1: 'ADD',
1224
+ 2: 'REPLACE'}
1225
+ # Displaying available update methods for hyperparameter tuning
1226
+ print("\nAvailable hyperparamters update methods with corresponding indices:")
1227
+ for index, method in update_methods.items():
1228
+ print(f"Index {index}: {method}")
1229
+
1230
+ # Getting list of models for hyperparameter tuning
1231
+ model_idx_list = self._process_list_input(
1232
+ input("\nEnter the list of model indices for performing hyperparameter tuning: "),
1233
+ 'int', list(all_models.keys()))
1234
+
1235
+ for model_index in model_idx_list:
1236
+ # Setting parameters for hyperparameter tuning specific to each model
1237
+ model_name = all_models[model_index]
1238
+ self.data['HyperparameterTuningParam'][model_name] = {}
1239
+
1240
+ # Getting list of hyperparameters for each model
1241
+ allowed_hyperparameters = self._get_allowed_hyperparameters(model_name)
1242
+ print(f"\nAvailable hyperparameters for model '{model_name}' with corresponding indices:")
1243
+ for index, hyperparameter in allowed_hyperparameters.items():
1244
+ print(f"Index {index}: {hyperparameter}")
1245
+
1246
+ model_hyperparameter_list_idx = self._process_list_input(
1247
+ input(f"\nEnter the list of hyperparameter indices for model '{model_name}': "),
1248
+ 'int', list(allowed_hyperparameters.keys()))
1249
+
1250
+ # Setting parameters for each hyperparameter of model
1251
+ for hyperparameter in model_hyperparameter_list_idx:
1252
+ hyperparameter_name = allowed_hyperparameters[hyperparameter]
1253
+ self.data['HyperparameterTuningParam'][model_name][hyperparameter_name] = {}
1254
+ method_idx = self._process_single_input(
1255
+ input(f"\nEnter the index of corresponding update method for hyperparameters "
1256
+ f"'{hyperparameter_name}' for model '{model_name}': "), 'int', list(update_methods.keys()))
1257
+ # Setting update method for hyperparameter
1258
+ self.data['HyperparameterTuningParam'][model_name][hyperparameter_name]["Method"] = \
1259
+ update_methods[method_idx]
1260
+
1261
+ hyperparameter_value = input(f"\nEnter the list of value for hyperparameter "
1262
+ f"'{hyperparameter_name}' for model '{model_name}': ")
1263
+
1264
+ hyperparameter_type = self._get_allowed_hyperparameters_types(hyperparameter_name)
1265
+
1266
+ # Setting hyperparameter value specific to each hyperparameter
1267
+ self.data['HyperparameterTuningParam'][model_name][hyperparameter_name]["Value"] = \
1268
+ self._process_list_input(hyperparameter_value, hyperparameter_type)
1269
+
1270
+ print("\nCustomization of model hyperparameter has been completed successfully.")