teradataml 20.0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1208) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +2762 -0
  4. teradataml/__init__.py +78 -0
  5. teradataml/_version.py +11 -0
  6. teradataml/analytics/Transformations.py +2996 -0
  7. teradataml/analytics/__init__.py +82 -0
  8. teradataml/analytics/analytic_function_executor.py +2416 -0
  9. teradataml/analytics/analytic_query_generator.py +1050 -0
  10. teradataml/analytics/byom/H2OPredict.py +514 -0
  11. teradataml/analytics/byom/PMMLPredict.py +437 -0
  12. teradataml/analytics/byom/__init__.py +16 -0
  13. teradataml/analytics/json_parser/__init__.py +133 -0
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
  15. teradataml/analytics/json_parser/json_store.py +191 -0
  16. teradataml/analytics/json_parser/metadata.py +1666 -0
  17. teradataml/analytics/json_parser/utils.py +805 -0
  18. teradataml/analytics/meta_class.py +236 -0
  19. teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
  21. teradataml/analytics/sqle/__init__.py +128 -0
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
  24. teradataml/analytics/table_operator/__init__.py +11 -0
  25. teradataml/analytics/uaf/__init__.py +82 -0
  26. teradataml/analytics/utils.py +828 -0
  27. teradataml/analytics/valib.py +1617 -0
  28. teradataml/automl/__init__.py +5835 -0
  29. teradataml/automl/autodataprep/__init__.py +493 -0
  30. teradataml/automl/custom_json_utils.py +1625 -0
  31. teradataml/automl/data_preparation.py +1384 -0
  32. teradataml/automl/data_transformation.py +1254 -0
  33. teradataml/automl/feature_engineering.py +2273 -0
  34. teradataml/automl/feature_exploration.py +1873 -0
  35. teradataml/automl/model_evaluation.py +488 -0
  36. teradataml/automl/model_training.py +1407 -0
  37. teradataml/catalog/__init__.py +2 -0
  38. teradataml/catalog/byom.py +1759 -0
  39. teradataml/catalog/function_argument_mapper.py +859 -0
  40. teradataml/catalog/model_cataloging_utils.py +491 -0
  41. teradataml/clients/__init__.py +0 -0
  42. teradataml/clients/auth_client.py +137 -0
  43. teradataml/clients/keycloak_client.py +165 -0
  44. teradataml/clients/pkce_client.py +481 -0
  45. teradataml/common/__init__.py +1 -0
  46. teradataml/common/aed_utils.py +2078 -0
  47. teradataml/common/bulk_exposed_utils.py +113 -0
  48. teradataml/common/constants.py +1669 -0
  49. teradataml/common/deprecations.py +166 -0
  50. teradataml/common/exceptions.py +147 -0
  51. teradataml/common/formula.py +743 -0
  52. teradataml/common/garbagecollector.py +666 -0
  53. teradataml/common/logger.py +1261 -0
  54. teradataml/common/messagecodes.py +518 -0
  55. teradataml/common/messages.py +262 -0
  56. teradataml/common/pylogger.py +67 -0
  57. teradataml/common/sqlbundle.py +764 -0
  58. teradataml/common/td_coltype_code_to_tdtype.py +48 -0
  59. teradataml/common/utils.py +3166 -0
  60. teradataml/common/warnings.py +36 -0
  61. teradataml/common/wrapper_utils.py +625 -0
  62. teradataml/config/__init__.py +0 -0
  63. teradataml/config/dummy_file1.cfg +5 -0
  64. teradataml/config/dummy_file2.cfg +3 -0
  65. teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
  66. teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
  67. teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
  68. teradataml/context/__init__.py +0 -0
  69. teradataml/context/aed_context.py +223 -0
  70. teradataml/context/context.py +1462 -0
  71. teradataml/data/A_loan.csv +19 -0
  72. teradataml/data/BINARY_REALS_LEFT.csv +11 -0
  73. teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
  74. teradataml/data/B_loan.csv +49 -0
  75. teradataml/data/BuoyData2.csv +17 -0
  76. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
  77. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
  78. teradataml/data/Convolve2RealsLeft.csv +5 -0
  79. teradataml/data/Convolve2RealsRight.csv +5 -0
  80. teradataml/data/Convolve2ValidLeft.csv +11 -0
  81. teradataml/data/Convolve2ValidRight.csv +11 -0
  82. teradataml/data/DFFTConv_Real_8_8.csv +65 -0
  83. teradataml/data/Employee.csv +5 -0
  84. teradataml/data/Employee_Address.csv +4 -0
  85. teradataml/data/Employee_roles.csv +5 -0
  86. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  87. teradataml/data/Mall_customer_data.csv +201 -0
  88. teradataml/data/Orders1_12mf.csv +25 -0
  89. teradataml/data/Pi_loan.csv +7 -0
  90. teradataml/data/SMOOTHED_DATA.csv +7 -0
  91. teradataml/data/TestDFFT8.csv +9 -0
  92. teradataml/data/TestRiver.csv +109 -0
  93. teradataml/data/Traindata.csv +28 -0
  94. teradataml/data/__init__.py +0 -0
  95. teradataml/data/acf.csv +17 -0
  96. teradataml/data/adaboost_example.json +34 -0
  97. teradataml/data/adaboostpredict_example.json +24 -0
  98. teradataml/data/additional_table.csv +11 -0
  99. teradataml/data/admissions_test.csv +21 -0
  100. teradataml/data/admissions_train.csv +41 -0
  101. teradataml/data/admissions_train_nulls.csv +41 -0
  102. teradataml/data/advertising.csv +201 -0
  103. teradataml/data/ageandheight.csv +13 -0
  104. teradataml/data/ageandpressure.csv +31 -0
  105. teradataml/data/amazon_reviews_25.csv +26 -0
  106. teradataml/data/antiselect_example.json +36 -0
  107. teradataml/data/antiselect_input.csv +8 -0
  108. teradataml/data/antiselect_input_mixed_case.csv +8 -0
  109. teradataml/data/applicant_external.csv +7 -0
  110. teradataml/data/applicant_reference.csv +7 -0
  111. teradataml/data/apriori_example.json +22 -0
  112. teradataml/data/arima_example.json +9 -0
  113. teradataml/data/assortedtext_input.csv +8 -0
  114. teradataml/data/attribution_example.json +34 -0
  115. teradataml/data/attribution_sample_table.csv +27 -0
  116. teradataml/data/attribution_sample_table1.csv +6 -0
  117. teradataml/data/attribution_sample_table2.csv +11 -0
  118. teradataml/data/bank_churn.csv +10001 -0
  119. teradataml/data/bank_marketing.csv +11163 -0
  120. teradataml/data/bank_web_clicks1.csv +43 -0
  121. teradataml/data/bank_web_clicks2.csv +91 -0
  122. teradataml/data/bank_web_url.csv +85 -0
  123. teradataml/data/barrier.csv +2 -0
  124. teradataml/data/barrier_new.csv +3 -0
  125. teradataml/data/betweenness_example.json +14 -0
  126. teradataml/data/bike_sharing.csv +732 -0
  127. teradataml/data/bin_breaks.csv +8 -0
  128. teradataml/data/bin_fit_ip.csv +4 -0
  129. teradataml/data/binary_complex_left.csv +11 -0
  130. teradataml/data/binary_complex_right.csv +11 -0
  131. teradataml/data/binary_matrix_complex_left.csv +21 -0
  132. teradataml/data/binary_matrix_complex_right.csv +21 -0
  133. teradataml/data/binary_matrix_real_left.csv +21 -0
  134. teradataml/data/binary_matrix_real_right.csv +21 -0
  135. teradataml/data/blood2ageandweight.csv +26 -0
  136. teradataml/data/bmi.csv +501 -0
  137. teradataml/data/boston.csv +507 -0
  138. teradataml/data/boston2cols.csv +721 -0
  139. teradataml/data/breast_cancer.csv +570 -0
  140. teradataml/data/buoydata_mix.csv +11 -0
  141. teradataml/data/burst_data.csv +5 -0
  142. teradataml/data/burst_example.json +21 -0
  143. teradataml/data/byom_example.json +34 -0
  144. teradataml/data/bytes_table.csv +4 -0
  145. teradataml/data/cal_housing_ex_raw.csv +70 -0
  146. teradataml/data/callers.csv +7 -0
  147. teradataml/data/calls.csv +10 -0
  148. teradataml/data/cars_hist.csv +33 -0
  149. teradataml/data/cat_table.csv +25 -0
  150. teradataml/data/ccm_example.json +32 -0
  151. teradataml/data/ccm_input.csv +91 -0
  152. teradataml/data/ccm_input2.csv +13 -0
  153. teradataml/data/ccmexample.csv +101 -0
  154. teradataml/data/ccmprepare_example.json +9 -0
  155. teradataml/data/ccmprepare_input.csv +91 -0
  156. teradataml/data/cfilter_example.json +12 -0
  157. teradataml/data/changepointdetection_example.json +18 -0
  158. teradataml/data/changepointdetectionrt_example.json +8 -0
  159. teradataml/data/chi_sq.csv +3 -0
  160. teradataml/data/churn_data.csv +14 -0
  161. teradataml/data/churn_emission.csv +35 -0
  162. teradataml/data/churn_initial.csv +3 -0
  163. teradataml/data/churn_state_transition.csv +5 -0
  164. teradataml/data/citedges_2.csv +745 -0
  165. teradataml/data/citvertices_2.csv +1210 -0
  166. teradataml/data/clicks2.csv +16 -0
  167. teradataml/data/clickstream.csv +13 -0
  168. teradataml/data/clickstream1.csv +11 -0
  169. teradataml/data/closeness_example.json +16 -0
  170. teradataml/data/complaints.csv +21 -0
  171. teradataml/data/complaints_mini.csv +3 -0
  172. teradataml/data/complaints_test_tokenized.csv +353 -0
  173. teradataml/data/complaints_testtoken.csv +224 -0
  174. teradataml/data/complaints_tokens_model.csv +348 -0
  175. teradataml/data/complaints_tokens_test.csv +353 -0
  176. teradataml/data/complaints_traintoken.csv +472 -0
  177. teradataml/data/computers_category.csv +1001 -0
  178. teradataml/data/computers_test1.csv +1252 -0
  179. teradataml/data/computers_train1.csv +5009 -0
  180. teradataml/data/computers_train1_clustered.csv +5009 -0
  181. teradataml/data/confusionmatrix_example.json +9 -0
  182. teradataml/data/conversion_event_table.csv +3 -0
  183. teradataml/data/corr_input.csv +17 -0
  184. teradataml/data/correlation_example.json +11 -0
  185. teradataml/data/covid_confirm_sd.csv +83 -0
  186. teradataml/data/coxhazardratio_example.json +39 -0
  187. teradataml/data/coxph_example.json +15 -0
  188. teradataml/data/coxsurvival_example.json +28 -0
  189. teradataml/data/cpt.csv +41 -0
  190. teradataml/data/credit_ex_merged.csv +45 -0
  191. teradataml/data/creditcard_data.csv +1001 -0
  192. teradataml/data/customer_loyalty.csv +301 -0
  193. teradataml/data/customer_loyalty_newseq.csv +31 -0
  194. teradataml/data/customer_segmentation_test.csv +2628 -0
  195. teradataml/data/customer_segmentation_train.csv +8069 -0
  196. teradataml/data/dataframe_example.json +173 -0
  197. teradataml/data/decisionforest_example.json +37 -0
  198. teradataml/data/decisionforestpredict_example.json +38 -0
  199. teradataml/data/decisiontree_example.json +21 -0
  200. teradataml/data/decisiontreepredict_example.json +45 -0
  201. teradataml/data/dfft2_size4_real.csv +17 -0
  202. teradataml/data/dfft2_test_matrix16.csv +17 -0
  203. teradataml/data/dfft2conv_real_4_4.csv +65 -0
  204. teradataml/data/diabetes.csv +443 -0
  205. teradataml/data/diabetes_test.csv +89 -0
  206. teradataml/data/dict_table.csv +5 -0
  207. teradataml/data/docperterm_table.csv +4 -0
  208. teradataml/data/docs/__init__.py +1 -0
  209. teradataml/data/docs/byom/__init__.py +0 -0
  210. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
  211. teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
  212. teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
  213. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  214. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
  215. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  216. teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
  217. teradataml/data/docs/byom/docs/__init__.py +0 -0
  218. teradataml/data/docs/sqle/__init__.py +0 -0
  219. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
  220. teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
  221. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
  222. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
  223. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
  224. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
  225. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
  226. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
  227. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
  228. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
  229. teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
  230. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
  231. teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
  232. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
  233. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
  234. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
  235. teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
  236. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
  237. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
  238. teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
  239. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
  240. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
  241. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
  242. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
  243. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
  244. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
  245. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
  246. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
  247. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
  248. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
  249. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
  250. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
  251. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
  252. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
  253. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
  254. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
  255. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
  256. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
  257. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
  258. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
  259. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
  260. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
  261. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
  262. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
  263. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
  264. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
  265. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
  266. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
  267. teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
  268. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
  269. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
  270. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  271. teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
  272. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
  273. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
  274. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  275. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
  276. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
  277. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
  278. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
  279. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
  280. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
  281. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
  282. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
  283. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
  284. teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
  285. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
  286. teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
  287. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
  288. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
  289. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
  290. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
  291. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
  292. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
  293. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
  294. teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
  295. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
  296. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
  297. teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
  298. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
  299. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  300. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
  301. teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
  302. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  303. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
  304. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
  305. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
  306. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
  307. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
  308. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
  309. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
  310. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
  311. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
  312. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
  313. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
  314. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
  315. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
  316. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
  317. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
  318. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  319. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
  320. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
  321. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
  322. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
  323. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
  324. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
  325. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
  326. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
  327. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
  328. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
  329. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
  330. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  331. teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
  332. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
  333. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
  334. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
  335. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
  336. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
  337. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
  338. teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
  339. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
  340. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
  341. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
  342. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
  343. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
  344. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
  345. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
  346. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  347. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  348. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
  349. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
  350. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  351. teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
  352. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
  353. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
  354. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
  355. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
  356. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  357. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
  358. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
  359. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
  360. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
  361. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
  362. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
  363. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
  364. teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
  365. teradataml/data/docs/tableoperator/__init__.py +0 -0
  366. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
  367. teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
  368. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
  369. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
  370. teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
  371. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
  372. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
  373. teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
  374. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  375. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
  376. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
  377. teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
  378. teradataml/data/docs/uaf/__init__.py +0 -0
  379. teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
  380. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
  381. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
  382. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
  383. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  384. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  385. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
  386. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
  387. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
  388. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
  389. teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
  390. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
  391. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  392. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
  393. teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
  394. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
  395. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
  396. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
  397. teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
  398. teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
  399. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  400. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
  401. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
  402. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
  403. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
  404. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  405. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
  406. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
  407. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
  408. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
  409. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
  410. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
  411. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
  412. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  413. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  414. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  415. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
  416. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
  417. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
  418. teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
  419. teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
  420. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  421. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
  422. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
  423. teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
  424. teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
  425. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
  426. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
  427. teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
  428. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  429. teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
  430. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
  431. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
  432. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
  433. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
  434. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
  435. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
  436. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
  437. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
  438. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
  439. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
  440. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  441. teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
  442. teradataml/data/dtw_example.json +18 -0
  443. teradataml/data/dtw_t1.csv +11 -0
  444. teradataml/data/dtw_t2.csv +4 -0
  445. teradataml/data/dwt2d_dataTable.csv +65 -0
  446. teradataml/data/dwt2d_example.json +16 -0
  447. teradataml/data/dwt_dataTable.csv +8 -0
  448. teradataml/data/dwt_example.json +15 -0
  449. teradataml/data/dwt_filterTable.csv +3 -0
  450. teradataml/data/dwt_filter_dim.csv +5 -0
  451. teradataml/data/emission.csv +9 -0
  452. teradataml/data/emp_table_by_dept.csv +19 -0
  453. teradataml/data/employee_info.csv +4 -0
  454. teradataml/data/employee_table.csv +6 -0
  455. teradataml/data/excluding_event_table.csv +2 -0
  456. teradataml/data/finance_data.csv +6 -0
  457. teradataml/data/finance_data2.csv +61 -0
  458. teradataml/data/finance_data3.csv +93 -0
  459. teradataml/data/finance_data4.csv +13 -0
  460. teradataml/data/fish.csv +160 -0
  461. teradataml/data/fm_blood2ageandweight.csv +26 -0
  462. teradataml/data/fmeasure_example.json +12 -0
  463. teradataml/data/followers_leaders.csv +10 -0
  464. teradataml/data/fpgrowth_example.json +12 -0
  465. teradataml/data/frequentpaths_example.json +29 -0
  466. teradataml/data/friends.csv +9 -0
  467. teradataml/data/fs_input.csv +33 -0
  468. teradataml/data/fs_input1.csv +33 -0
  469. teradataml/data/genData.csv +513 -0
  470. teradataml/data/geodataframe_example.json +40 -0
  471. teradataml/data/glass_types.csv +215 -0
  472. teradataml/data/glm_admissions_model.csv +12 -0
  473. teradataml/data/glm_example.json +56 -0
  474. teradataml/data/glml1l2_example.json +28 -0
  475. teradataml/data/glml1l2predict_example.json +54 -0
  476. teradataml/data/glmpredict_example.json +54 -0
  477. teradataml/data/gq_t1.csv +21 -0
  478. teradataml/data/grocery_transaction.csv +19 -0
  479. teradataml/data/hconvolve_complex_right.csv +5 -0
  480. teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
  481. teradataml/data/histogram_example.json +12 -0
  482. teradataml/data/hmmdecoder_example.json +79 -0
  483. teradataml/data/hmmevaluator_example.json +25 -0
  484. teradataml/data/hmmsupervised_example.json +10 -0
  485. teradataml/data/hmmunsupervised_example.json +8 -0
  486. teradataml/data/hnsw_alter_data.csv +5 -0
  487. teradataml/data/hnsw_data.csv +10 -0
  488. teradataml/data/house_values.csv +12 -0
  489. teradataml/data/house_values2.csv +13 -0
  490. teradataml/data/housing_cat.csv +7 -0
  491. teradataml/data/housing_data.csv +9 -0
  492. teradataml/data/housing_test.csv +47 -0
  493. teradataml/data/housing_test_binary.csv +47 -0
  494. teradataml/data/housing_train.csv +493 -0
  495. teradataml/data/housing_train_attribute.csv +5 -0
  496. teradataml/data/housing_train_binary.csv +437 -0
  497. teradataml/data/housing_train_parameter.csv +2 -0
  498. teradataml/data/housing_train_response.csv +493 -0
  499. teradataml/data/housing_train_segment.csv +201 -0
  500. teradataml/data/ibm_stock.csv +370 -0
  501. teradataml/data/ibm_stock1.csv +370 -0
  502. teradataml/data/identitymatch_example.json +22 -0
  503. teradataml/data/idf_table.csv +4 -0
  504. teradataml/data/idwt2d_dataTable.csv +5 -0
  505. teradataml/data/idwt_dataTable.csv +8 -0
  506. teradataml/data/idwt_filterTable.csv +3 -0
  507. teradataml/data/impressions.csv +101 -0
  508. teradataml/data/inflation.csv +21 -0
  509. teradataml/data/initial.csv +3 -0
  510. teradataml/data/insect2Cols.csv +61 -0
  511. teradataml/data/insect_sprays.csv +13 -0
  512. teradataml/data/insurance.csv +1339 -0
  513. teradataml/data/interpolator_example.json +13 -0
  514. teradataml/data/interval_data.csv +5 -0
  515. teradataml/data/iris_altinput.csv +481 -0
  516. teradataml/data/iris_attribute_output.csv +8 -0
  517. teradataml/data/iris_attribute_test.csv +121 -0
  518. teradataml/data/iris_attribute_train.csv +481 -0
  519. teradataml/data/iris_category_expect_predict.csv +31 -0
  520. teradataml/data/iris_data.csv +151 -0
  521. teradataml/data/iris_input.csv +151 -0
  522. teradataml/data/iris_response_train.csv +121 -0
  523. teradataml/data/iris_test.csv +31 -0
  524. teradataml/data/iris_train.csv +121 -0
  525. teradataml/data/join_table1.csv +4 -0
  526. teradataml/data/join_table2.csv +4 -0
  527. teradataml/data/jsons/anly_function_name.json +7 -0
  528. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  529. teradataml/data/jsons/byom/dataikupredict.json +148 -0
  530. teradataml/data/jsons/byom/datarobotpredict.json +147 -0
  531. teradataml/data/jsons/byom/h2opredict.json +195 -0
  532. teradataml/data/jsons/byom/onnxembeddings.json +267 -0
  533. teradataml/data/jsons/byom/onnxpredict.json +187 -0
  534. teradataml/data/jsons/byom/pmmlpredict.json +147 -0
  535. teradataml/data/jsons/paired_functions.json +450 -0
  536. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
  537. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
  538. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
  539. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
  540. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
  541. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
  542. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
  543. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
  544. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
  545. teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
  546. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
  547. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
  548. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
  549. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
  550. teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
  551. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
  552. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
  553. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
  554. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
  555. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
  556. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
  557. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
  558. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
  559. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
  560. teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
  561. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
  562. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
  563. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
  564. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
  565. teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
  566. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
  567. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
  568. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
  569. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
  570. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
  571. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
  572. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
  573. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
  574. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
  575. teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
  576. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
  577. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
  578. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
  579. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
  580. teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
  581. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
  582. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
  583. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
  584. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
  585. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
  586. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
  587. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
  588. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
  589. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
  590. teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
  591. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
  592. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
  593. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
  594. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
  595. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
  596. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
  597. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
  598. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
  599. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
  600. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
  601. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
  602. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
  603. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
  604. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
  605. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
  606. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
  607. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
  608. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
  609. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
  610. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
  611. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
  612. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
  613. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
  614. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
  615. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
  616. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
  617. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
  618. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
  619. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
  620. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
  621. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
  622. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
  623. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
  624. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
  625. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
  626. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
  627. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
  628. teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
  629. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
  630. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
  631. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
  632. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
  633. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
  634. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
  635. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
  636. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
  637. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
  638. teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
  639. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
  640. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
  641. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
  642. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
  643. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  644. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
  645. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
  646. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  647. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
  648. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
  649. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
  650. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
  651. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
  652. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
  653. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
  654. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
  655. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
  656. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
  657. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
  658. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
  659. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
  660. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
  661. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
  662. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
  663. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
  664. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
  665. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
  666. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
  667. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
  668. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
  669. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
  670. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  671. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  672. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  673. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
  674. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
  675. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
  676. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
  677. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
  678. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
  679. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
  680. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
  681. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
  682. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
  683. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
  684. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
  685. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  686. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
  687. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
  688. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
  689. teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
  690. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
  691. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
  692. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
  693. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
  694. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
  695. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
  696. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
  697. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  698. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
  699. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
  700. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
  701. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
  702. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
  703. teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
  704. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
  705. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
  706. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
  707. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
  708. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  709. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
  710. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
  711. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  712. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
  713. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
  714. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
  715. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  716. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
  717. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
  718. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
  719. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
  720. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
  721. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
  722. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
  723. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
  724. teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
  725. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
  726. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
  727. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
  728. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
  729. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
  730. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
  731. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
  732. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
  733. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
  734. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
  735. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
  736. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  737. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  738. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  739. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  740. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  741. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  742. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  743. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  744. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  745. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  746. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  747. teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
  748. teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
  749. teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
  750. teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
  751. teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
  752. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  753. teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
  754. teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
  755. teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
  756. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
  757. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
  758. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
  759. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  760. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  761. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
  762. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
  763. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
  764. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
  765. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
  766. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
  767. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
  768. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
  769. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
  770. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
  771. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
  772. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
  773. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
  774. teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
  775. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
  776. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  777. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  778. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
  779. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
  780. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
  781. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
  782. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
  783. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
  784. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
  785. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
  786. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  787. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  788. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
  789. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  790. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
  791. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
  792. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
  793. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  794. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
  795. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
  796. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
  797. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
  798. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
  799. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
  800. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
  801. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
  802. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  803. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
  804. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
  805. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
  806. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
  807. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
  808. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
  809. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
  810. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
  811. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
  812. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
  813. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
  814. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  815. teradataml/data/kmeans_example.json +23 -0
  816. teradataml/data/kmeans_table.csv +10 -0
  817. teradataml/data/kmeans_us_arrests_data.csv +51 -0
  818. teradataml/data/knn_example.json +19 -0
  819. teradataml/data/knnrecommender_example.json +7 -0
  820. teradataml/data/knnrecommenderpredict_example.json +12 -0
  821. teradataml/data/lar_example.json +17 -0
  822. teradataml/data/larpredict_example.json +30 -0
  823. teradataml/data/lc_new_predictors.csv +5 -0
  824. teradataml/data/lc_new_reference.csv +9 -0
  825. teradataml/data/lda_example.json +9 -0
  826. teradataml/data/ldainference_example.json +15 -0
  827. teradataml/data/ldatopicsummary_example.json +9 -0
  828. teradataml/data/levendist_input.csv +13 -0
  829. teradataml/data/levenshteindistance_example.json +10 -0
  830. teradataml/data/linreg_example.json +10 -0
  831. teradataml/data/load_example_data.py +350 -0
  832. teradataml/data/loan_prediction.csv +295 -0
  833. teradataml/data/lungcancer.csv +138 -0
  834. teradataml/data/mappingdata.csv +12 -0
  835. teradataml/data/medical_readings.csv +101 -0
  836. teradataml/data/milk_timeseries.csv +157 -0
  837. teradataml/data/min_max_titanic.csv +4 -0
  838. teradataml/data/minhash_example.json +6 -0
  839. teradataml/data/ml_ratings.csv +7547 -0
  840. teradataml/data/ml_ratings_10.csv +2445 -0
  841. teradataml/data/mobile_data.csv +13 -0
  842. teradataml/data/model1_table.csv +5 -0
  843. teradataml/data/model2_table.csv +5 -0
  844. teradataml/data/models/License_file.txt +1 -0
  845. teradataml/data/models/License_file_empty.txt +0 -0
  846. teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
  847. teradataml/data/models/dr_iris_rf +0 -0
  848. teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
  849. teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
  850. teradataml/data/models/iris_db_glm_model.pmml +57 -0
  851. teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
  852. teradataml/data/models/iris_kmeans_model +0 -0
  853. teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
  854. teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
  855. teradataml/data/modularity_example.json +12 -0
  856. teradataml/data/movavg_example.json +8 -0
  857. teradataml/data/mtx1.csv +7 -0
  858. teradataml/data/mtx2.csv +13 -0
  859. teradataml/data/multi_model_classification.csv +401 -0
  860. teradataml/data/multi_model_regression.csv +401 -0
  861. teradataml/data/mvdfft8.csv +9 -0
  862. teradataml/data/naivebayes_example.json +10 -0
  863. teradataml/data/naivebayespredict_example.json +19 -0
  864. teradataml/data/naivebayestextclassifier2_example.json +7 -0
  865. teradataml/data/naivebayestextclassifier_example.json +8 -0
  866. teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
  867. teradataml/data/name_Find_configure.csv +10 -0
  868. teradataml/data/namedentityfinder_example.json +14 -0
  869. teradataml/data/namedentityfinderevaluator_example.json +10 -0
  870. teradataml/data/namedentityfindertrainer_example.json +6 -0
  871. teradataml/data/nb_iris_input_test.csv +31 -0
  872. teradataml/data/nb_iris_input_train.csv +121 -0
  873. teradataml/data/nbp_iris_model.csv +13 -0
  874. teradataml/data/ner_dict.csv +8 -0
  875. teradataml/data/ner_extractor_text.csv +2 -0
  876. teradataml/data/ner_input_eng.csv +7 -0
  877. teradataml/data/ner_rule.csv +5 -0
  878. teradataml/data/ner_sports_test2.csv +29 -0
  879. teradataml/data/ner_sports_train.csv +501 -0
  880. teradataml/data/nerevaluator_example.json +6 -0
  881. teradataml/data/nerextractor_example.json +18 -0
  882. teradataml/data/nermem_sports_test.csv +18 -0
  883. teradataml/data/nermem_sports_train.csv +51 -0
  884. teradataml/data/nertrainer_example.json +7 -0
  885. teradataml/data/ngrams_example.json +7 -0
  886. teradataml/data/notebooks/__init__.py +0 -0
  887. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
  888. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
  889. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
  890. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
  891. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
  892. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
  893. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
  894. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
  895. teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
  896. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
  897. teradataml/data/npath_example.json +23 -0
  898. teradataml/data/ntree_example.json +14 -0
  899. teradataml/data/numeric_strings.csv +5 -0
  900. teradataml/data/numerics.csv +4 -0
  901. teradataml/data/ocean_buoy.csv +17 -0
  902. teradataml/data/ocean_buoy2.csv +17 -0
  903. teradataml/data/ocean_buoys.csv +28 -0
  904. teradataml/data/ocean_buoys2.csv +10 -0
  905. teradataml/data/ocean_buoys_nonpti.csv +28 -0
  906. teradataml/data/ocean_buoys_seq.csv +29 -0
  907. teradataml/data/onehot_encoder_train.csv +4 -0
  908. teradataml/data/openml_example.json +92 -0
  909. teradataml/data/optional_event_table.csv +4 -0
  910. teradataml/data/orders1.csv +11 -0
  911. teradataml/data/orders1_12.csv +13 -0
  912. teradataml/data/orders_ex.csv +4 -0
  913. teradataml/data/pack_example.json +9 -0
  914. teradataml/data/package_tracking.csv +19 -0
  915. teradataml/data/package_tracking_pti.csv +19 -0
  916. teradataml/data/pagerank_example.json +13 -0
  917. teradataml/data/paragraphs_input.csv +6 -0
  918. teradataml/data/pathanalyzer_example.json +8 -0
  919. teradataml/data/pathgenerator_example.json +8 -0
  920. teradataml/data/patient_profile.csv +101 -0
  921. teradataml/data/pattern_matching_data.csv +11 -0
  922. teradataml/data/payment_fraud_dataset.csv +10001 -0
  923. teradataml/data/peppers.png +0 -0
  924. teradataml/data/phrases.csv +7 -0
  925. teradataml/data/pivot_example.json +9 -0
  926. teradataml/data/pivot_input.csv +22 -0
  927. teradataml/data/playerRating.csv +31 -0
  928. teradataml/data/pos_input.csv +40 -0
  929. teradataml/data/postagger_example.json +7 -0
  930. teradataml/data/posttagger_output.csv +44 -0
  931. teradataml/data/production_data.csv +17 -0
  932. teradataml/data/production_data2.csv +7 -0
  933. teradataml/data/randomsample_example.json +32 -0
  934. teradataml/data/randomwalksample_example.json +9 -0
  935. teradataml/data/rank_table.csv +6 -0
  936. teradataml/data/real_values.csv +14 -0
  937. teradataml/data/ref_mobile_data.csv +4 -0
  938. teradataml/data/ref_mobile_data_dense.csv +2 -0
  939. teradataml/data/ref_url.csv +17 -0
  940. teradataml/data/restaurant_reviews.csv +7 -0
  941. teradataml/data/retail_churn_table.csv +27772 -0
  942. teradataml/data/river_data.csv +145 -0
  943. teradataml/data/roc_example.json +8 -0
  944. teradataml/data/roc_input.csv +101 -0
  945. teradataml/data/rule_inputs.csv +6 -0
  946. teradataml/data/rule_table.csv +2 -0
  947. teradataml/data/sales.csv +7 -0
  948. teradataml/data/sales_transaction.csv +501 -0
  949. teradataml/data/salesdata.csv +342 -0
  950. teradataml/data/sample_cities.csv +3 -0
  951. teradataml/data/sample_shapes.csv +11 -0
  952. teradataml/data/sample_streets.csv +3 -0
  953. teradataml/data/sampling_example.json +16 -0
  954. teradataml/data/sax_example.json +17 -0
  955. teradataml/data/scale_attributes.csv +3 -0
  956. teradataml/data/scale_example.json +74 -0
  957. teradataml/data/scale_housing.csv +11 -0
  958. teradataml/data/scale_housing_test.csv +6 -0
  959. teradataml/data/scale_input_part_sparse.csv +31 -0
  960. teradataml/data/scale_input_partitioned.csv +16 -0
  961. teradataml/data/scale_input_sparse.csv +11 -0
  962. teradataml/data/scale_parameters.csv +3 -0
  963. teradataml/data/scale_stat.csv +11 -0
  964. teradataml/data/scalebypartition_example.json +13 -0
  965. teradataml/data/scalemap_example.json +13 -0
  966. teradataml/data/scalesummary_example.json +12 -0
  967. teradataml/data/score_category.csv +101 -0
  968. teradataml/data/score_summary.csv +4 -0
  969. teradataml/data/script_example.json +10 -0
  970. teradataml/data/scripts/deploy_script.py +84 -0
  971. teradataml/data/scripts/lightgbm/dataset.template +175 -0
  972. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
  973. teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
  974. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
  975. teradataml/data/scripts/mapper.R +20 -0
  976. teradataml/data/scripts/mapper.py +16 -0
  977. teradataml/data/scripts/mapper_replace.py +16 -0
  978. teradataml/data/scripts/sklearn/__init__.py +0 -0
  979. teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
  980. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
  981. teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
  982. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
  983. teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
  984. teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
  985. teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
  986. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  987. teradataml/data/seeds.csv +10 -0
  988. teradataml/data/sentenceextractor_example.json +7 -0
  989. teradataml/data/sentiment_extract_input.csv +11 -0
  990. teradataml/data/sentiment_train.csv +16 -0
  991. teradataml/data/sentiment_word.csv +20 -0
  992. teradataml/data/sentiment_word_input.csv +20 -0
  993. teradataml/data/sentimentextractor_example.json +24 -0
  994. teradataml/data/sentimenttrainer_example.json +8 -0
  995. teradataml/data/sequence_table.csv +10 -0
  996. teradataml/data/seriessplitter_example.json +8 -0
  997. teradataml/data/sessionize_example.json +17 -0
  998. teradataml/data/sessionize_table.csv +116 -0
  999. teradataml/data/setop_test1.csv +24 -0
  1000. teradataml/data/setop_test2.csv +22 -0
  1001. teradataml/data/soc_nw_edges.csv +11 -0
  1002. teradataml/data/soc_nw_vertices.csv +8 -0
  1003. teradataml/data/souvenir_timeseries.csv +168 -0
  1004. teradataml/data/sparse_iris_attribute.csv +5 -0
  1005. teradataml/data/sparse_iris_test.csv +121 -0
  1006. teradataml/data/sparse_iris_train.csv +601 -0
  1007. teradataml/data/star1.csv +6 -0
  1008. teradataml/data/star_pivot.csv +8 -0
  1009. teradataml/data/state_transition.csv +5 -0
  1010. teradataml/data/stock_data.csv +53 -0
  1011. teradataml/data/stock_movement.csv +11 -0
  1012. teradataml/data/stock_vol.csv +76 -0
  1013. teradataml/data/stop_words.csv +8 -0
  1014. teradataml/data/store_sales.csv +37 -0
  1015. teradataml/data/stringsimilarity_example.json +8 -0
  1016. teradataml/data/strsimilarity_input.csv +13 -0
  1017. teradataml/data/students.csv +101 -0
  1018. teradataml/data/svm_iris_input_test.csv +121 -0
  1019. teradataml/data/svm_iris_input_train.csv +481 -0
  1020. teradataml/data/svm_iris_model.csv +7 -0
  1021. teradataml/data/svmdense_example.json +10 -0
  1022. teradataml/data/svmdensepredict_example.json +19 -0
  1023. teradataml/data/svmsparse_example.json +8 -0
  1024. teradataml/data/svmsparsepredict_example.json +14 -0
  1025. teradataml/data/svmsparsesummary_example.json +8 -0
  1026. teradataml/data/target_mobile_data.csv +13 -0
  1027. teradataml/data/target_mobile_data_dense.csv +5 -0
  1028. teradataml/data/target_udt_data.csv +8 -0
  1029. teradataml/data/tdnerextractor_example.json +14 -0
  1030. teradataml/data/templatedata.csv +1201 -0
  1031. teradataml/data/templates/open_source_ml.json +11 -0
  1032. teradataml/data/teradata_icon.ico +0 -0
  1033. teradataml/data/teradataml_example.json +1473 -0
  1034. teradataml/data/test_classification.csv +101 -0
  1035. teradataml/data/test_loan_prediction.csv +53 -0
  1036. teradataml/data/test_pacf_12.csv +37 -0
  1037. teradataml/data/test_prediction.csv +101 -0
  1038. teradataml/data/test_regression.csv +101 -0
  1039. teradataml/data/test_river2.csv +109 -0
  1040. teradataml/data/text_inputs.csv +6 -0
  1041. teradataml/data/textchunker_example.json +8 -0
  1042. teradataml/data/textclassifier_example.json +7 -0
  1043. teradataml/data/textclassifier_input.csv +7 -0
  1044. teradataml/data/textclassifiertrainer_example.json +7 -0
  1045. teradataml/data/textmorph_example.json +11 -0
  1046. teradataml/data/textparser_example.json +15 -0
  1047. teradataml/data/texttagger_example.json +12 -0
  1048. teradataml/data/texttokenizer_example.json +7 -0
  1049. teradataml/data/texttrainer_input.csv +11 -0
  1050. teradataml/data/tf_example.json +7 -0
  1051. teradataml/data/tfidf_example.json +14 -0
  1052. teradataml/data/tfidf_input1.csv +201 -0
  1053. teradataml/data/tfidf_train.csv +6 -0
  1054. teradataml/data/time_table1.csv +535 -0
  1055. teradataml/data/time_table2.csv +14 -0
  1056. teradataml/data/timeseriesdata.csv +1601 -0
  1057. teradataml/data/timeseriesdatasetsd4.csv +105 -0
  1058. teradataml/data/timestamp_data.csv +4 -0
  1059. teradataml/data/titanic.csv +892 -0
  1060. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  1061. teradataml/data/to_num_data.csv +4 -0
  1062. teradataml/data/tochar_data.csv +5 -0
  1063. teradataml/data/token_table.csv +696 -0
  1064. teradataml/data/train_multiclass.csv +101 -0
  1065. teradataml/data/train_regression.csv +101 -0
  1066. teradataml/data/train_regression_multiple_labels.csv +101 -0
  1067. teradataml/data/train_tracking.csv +28 -0
  1068. teradataml/data/trans_dense.csv +16 -0
  1069. teradataml/data/trans_sparse.csv +55 -0
  1070. teradataml/data/transformation_table.csv +6 -0
  1071. teradataml/data/transformation_table_new.csv +2 -0
  1072. teradataml/data/tv_spots.csv +16 -0
  1073. teradataml/data/twod_climate_data.csv +117 -0
  1074. teradataml/data/uaf_example.json +529 -0
  1075. teradataml/data/univariatestatistics_example.json +9 -0
  1076. teradataml/data/unpack_example.json +10 -0
  1077. teradataml/data/unpivot_example.json +25 -0
  1078. teradataml/data/unpivot_input.csv +8 -0
  1079. teradataml/data/url_data.csv +10 -0
  1080. teradataml/data/us_air_pass.csv +37 -0
  1081. teradataml/data/us_population.csv +624 -0
  1082. teradataml/data/us_states_shapes.csv +52 -0
  1083. teradataml/data/varmax_example.json +18 -0
  1084. teradataml/data/vectordistance_example.json +30 -0
  1085. teradataml/data/ville_climatedata.csv +121 -0
  1086. teradataml/data/ville_tempdata.csv +12 -0
  1087. teradataml/data/ville_tempdata1.csv +12 -0
  1088. teradataml/data/ville_temperature.csv +11 -0
  1089. teradataml/data/waveletTable.csv +1605 -0
  1090. teradataml/data/waveletTable2.csv +1605 -0
  1091. teradataml/data/weightedmovavg_example.json +9 -0
  1092. teradataml/data/wft_testing.csv +5 -0
  1093. teradataml/data/windowdfft.csv +16 -0
  1094. teradataml/data/wine_data.csv +1600 -0
  1095. teradataml/data/word_embed_input_table1.csv +6 -0
  1096. teradataml/data/word_embed_input_table2.csv +5 -0
  1097. teradataml/data/word_embed_model.csv +23 -0
  1098. teradataml/data/words_input.csv +13 -0
  1099. teradataml/data/xconvolve_complex_left.csv +6 -0
  1100. teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
  1101. teradataml/data/xgboost_example.json +36 -0
  1102. teradataml/data/xgboostpredict_example.json +32 -0
  1103. teradataml/data/ztest_example.json +16 -0
  1104. teradataml/dataframe/__init__.py +0 -0
  1105. teradataml/dataframe/copy_to.py +2446 -0
  1106. teradataml/dataframe/data_transfer.py +2840 -0
  1107. teradataml/dataframe/dataframe.py +20908 -0
  1108. teradataml/dataframe/dataframe_utils.py +2114 -0
  1109. teradataml/dataframe/fastload.py +794 -0
  1110. teradataml/dataframe/functions.py +2110 -0
  1111. teradataml/dataframe/indexer.py +424 -0
  1112. teradataml/dataframe/row.py +160 -0
  1113. teradataml/dataframe/setop.py +1171 -0
  1114. teradataml/dataframe/sql.py +10904 -0
  1115. teradataml/dataframe/sql_function_parameters.py +440 -0
  1116. teradataml/dataframe/sql_functions.py +652 -0
  1117. teradataml/dataframe/sql_interfaces.py +220 -0
  1118. teradataml/dataframe/vantage_function_types.py +675 -0
  1119. teradataml/dataframe/window.py +694 -0
  1120. teradataml/dbutils/__init__.py +3 -0
  1121. teradataml/dbutils/dbutils.py +2871 -0
  1122. teradataml/dbutils/filemgr.py +318 -0
  1123. teradataml/gen_ai/__init__.py +2 -0
  1124. teradataml/gen_ai/convAI.py +473 -0
  1125. teradataml/geospatial/__init__.py +4 -0
  1126. teradataml/geospatial/geodataframe.py +1105 -0
  1127. teradataml/geospatial/geodataframecolumn.py +392 -0
  1128. teradataml/geospatial/geometry_types.py +926 -0
  1129. teradataml/hyperparameter_tuner/__init__.py +1 -0
  1130. teradataml/hyperparameter_tuner/optimizer.py +4115 -0
  1131. teradataml/hyperparameter_tuner/utils.py +303 -0
  1132. teradataml/lib/__init__.py +0 -0
  1133. teradataml/lib/aed_0_1.dll +0 -0
  1134. teradataml/lib/libaed_0_1.dylib +0 -0
  1135. teradataml/lib/libaed_0_1.so +0 -0
  1136. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  1137. teradataml/lib/libaed_0_1_ppc64le.so +0 -0
  1138. teradataml/opensource/__init__.py +1 -0
  1139. teradataml/opensource/_base.py +1321 -0
  1140. teradataml/opensource/_class.py +464 -0
  1141. teradataml/opensource/_constants.py +61 -0
  1142. teradataml/opensource/_lightgbm.py +949 -0
  1143. teradataml/opensource/_sklearn.py +1008 -0
  1144. teradataml/opensource/_wrapper_utils.py +267 -0
  1145. teradataml/options/__init__.py +148 -0
  1146. teradataml/options/configure.py +489 -0
  1147. teradataml/options/display.py +187 -0
  1148. teradataml/plot/__init__.py +3 -0
  1149. teradataml/plot/axis.py +1427 -0
  1150. teradataml/plot/constants.py +15 -0
  1151. teradataml/plot/figure.py +431 -0
  1152. teradataml/plot/plot.py +810 -0
  1153. teradataml/plot/query_generator.py +83 -0
  1154. teradataml/plot/subplot.py +216 -0
  1155. teradataml/scriptmgmt/UserEnv.py +4273 -0
  1156. teradataml/scriptmgmt/__init__.py +3 -0
  1157. teradataml/scriptmgmt/lls_utils.py +2157 -0
  1158. teradataml/sdk/README.md +79 -0
  1159. teradataml/sdk/__init__.py +4 -0
  1160. teradataml/sdk/_auth_modes.py +422 -0
  1161. teradataml/sdk/_func_params.py +487 -0
  1162. teradataml/sdk/_json_parser.py +453 -0
  1163. teradataml/sdk/_openapi_spec_constants.py +249 -0
  1164. teradataml/sdk/_utils.py +236 -0
  1165. teradataml/sdk/api_client.py +900 -0
  1166. teradataml/sdk/constants.py +62 -0
  1167. teradataml/sdk/modelops/__init__.py +98 -0
  1168. teradataml/sdk/modelops/_client.py +409 -0
  1169. teradataml/sdk/modelops/_constants.py +304 -0
  1170. teradataml/sdk/modelops/models.py +2308 -0
  1171. teradataml/sdk/spinner.py +107 -0
  1172. teradataml/series/__init__.py +0 -0
  1173. teradataml/series/series.py +537 -0
  1174. teradataml/series/series_utils.py +71 -0
  1175. teradataml/store/__init__.py +12 -0
  1176. teradataml/store/feature_store/__init__.py +0 -0
  1177. teradataml/store/feature_store/constants.py +658 -0
  1178. teradataml/store/feature_store/feature_store.py +4814 -0
  1179. teradataml/store/feature_store/mind_map.py +639 -0
  1180. teradataml/store/feature_store/models.py +7330 -0
  1181. teradataml/store/feature_store/utils.py +390 -0
  1182. teradataml/table_operators/Apply.py +979 -0
  1183. teradataml/table_operators/Script.py +1739 -0
  1184. teradataml/table_operators/TableOperator.py +1343 -0
  1185. teradataml/table_operators/__init__.py +2 -0
  1186. teradataml/table_operators/apply_query_generator.py +262 -0
  1187. teradataml/table_operators/query_generator.py +493 -0
  1188. teradataml/table_operators/table_operator_query_generator.py +462 -0
  1189. teradataml/table_operators/table_operator_util.py +726 -0
  1190. teradataml/table_operators/templates/dataframe_apply.template +184 -0
  1191. teradataml/table_operators/templates/dataframe_map.template +176 -0
  1192. teradataml/table_operators/templates/dataframe_register.template +73 -0
  1193. teradataml/table_operators/templates/dataframe_udf.template +67 -0
  1194. teradataml/table_operators/templates/script_executor.template +170 -0
  1195. teradataml/telemetry_utils/__init__.py +0 -0
  1196. teradataml/telemetry_utils/queryband.py +53 -0
  1197. teradataml/utils/__init__.py +0 -0
  1198. teradataml/utils/docstring.py +527 -0
  1199. teradataml/utils/dtypes.py +943 -0
  1200. teradataml/utils/internal_buffer.py +122 -0
  1201. teradataml/utils/print_versions.py +206 -0
  1202. teradataml/utils/utils.py +451 -0
  1203. teradataml/utils/validators.py +3305 -0
  1204. teradataml-20.0.0.8.dist-info/METADATA +2804 -0
  1205. teradataml-20.0.0.8.dist-info/RECORD +1208 -0
  1206. teradataml-20.0.0.8.dist-info/WHEEL +5 -0
  1207. teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
  1208. teradataml-20.0.0.8.dist-info/zip-safe +1 -0
@@ -0,0 +1,1873 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2025 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Sweta Shaw
7
+ # Email Id: Sweta.Shaw@Teradata.com
8
+ #
9
+ # Secondary Owner: Akhil Bisht
10
+ # Email Id: AKHIL.BISHT@Teradata.com
11
+ #
12
+ # Version: 1.1
13
+ # Function Version: 1.0
14
+ # ##################################################################
15
+
16
+ # Python Libraries
17
+ import pandas as pd
18
+ import matplotlib.pyplot as plt
19
+ import seaborn as sns
20
+ import numpy as np
21
+ import math
22
+
23
+ # Teradata libraries
24
+ from teradataml.dataframe.dataframe import DataFrame
25
+ from teradataml.dataframe.copy_to import copy_to_sql
26
+ from teradataml import ColumnSummary, CategoricalSummary, GetFutileColumns
27
+ from teradataml import OutlierFilterFit, OutlierFilterTransform
28
+ from teradataml import OrdinalEncodingFit, OrdinalEncodingTransform
29
+ from teradataml.hyperparameter_tuner.utils import _ProgressBar
30
+ from teradataml.common.messages import Messages, MessageCodes
31
+ from teradataml import display as dp
32
+ from teradataml.utils.validators import _Validators
33
+ from teradataml.common.utils import UtilFuncs
34
+ from teradataml.common.garbagecollector import GarbageCollector
35
+ from teradataml.common.logger import TeradataMlLogger, get_td_logger
36
+
37
+ def _is_terminal():
38
+ """
39
+ DESCRIPTION:
40
+ Common Function detects whether code is running in
41
+ terminal/console or IPython supported environment.
42
+
43
+ PARAMETERS:
44
+ None
45
+
46
+ RETURNS:
47
+ bool
48
+
49
+ RAISES:
50
+ None
51
+
52
+ EXAMPLES:
53
+ >>> is_terminal = _is_terminal()
54
+ """
55
+ if not hasattr(_is_terminal, 'ipython_imported'):
56
+ try:
57
+ # Check IPython environment
58
+ __IPYTHON__
59
+ # Check if IPython library is installed
60
+ from IPython.display import display, HTML
61
+ _is_terminal.ipython_imported = True
62
+ except (NameError, ImportError):
63
+ # If error, then terminal
64
+ _is_terminal.ipython_imported = False
65
+
66
+ return not _is_terminal.ipython_imported
67
+
68
+ # # conditional import
69
+ if not _is_terminal():
70
+ from IPython.display import display, HTML
71
+
72
+ @TeradataMlLogger
73
+ class _FeatureExplore:
74
+
75
+ def __init__(self,
76
+ data=None,
77
+ target_column=None,
78
+ custom_data=None,
79
+ verbose=0,
80
+ task_type='regression',
81
+ fraud=False,
82
+ churn=False,
83
+ cluster=False,
84
+ **kwargs):
85
+ """
86
+ DESCRIPTION:
87
+ Internal function initializes the data, target column for feature exploration.
88
+
89
+ PARAMETERS:
90
+ data:
91
+ Required Argument.
92
+ Specifies the input teradataml DataFrame for feature exploration.
93
+ Types: teradataml Dataframe
94
+
95
+ target_column:
96
+ Required Arugment.
97
+ Set to None for Clustering
98
+ Specifies the name of the target column in "data".
99
+ Types: str
100
+
101
+ custom_data:
102
+ Optional Argument.
103
+ Specifies json object containing user customized input.
104
+ Types: json object
105
+
106
+ verbose:
107
+ Optional Argument.
108
+ Specifies the detailed execution steps based on verbose level.
109
+ Default Value: 0
110
+ Permitted Values:
111
+ * 0: prints the progress bar and leaderboard
112
+ * 1: prints the execution steps of AutoML.
113
+ * 2: prints the intermediate data between the execution of each step of AutoML.
114
+ Types: int
115
+
116
+ task_type:
117
+ Optional Argument.
118
+ Specifies the task type of the data.
119
+ Default Value: 'regression'
120
+ Permitted Values:
121
+ * 'regression'
122
+ * 'classification'
123
+ Types: str
124
+
125
+ fraud:
126
+ Optional Argument.
127
+ Specifies whether to apply fraud detection techniques.
128
+ Default Value: False
129
+ Types: bool
130
+
131
+ churn:
132
+ Optional Argument.
133
+ Specifies whether to apply churn prediction techniques.
134
+ Default Value: False
135
+ Types: bool
136
+
137
+ cluster:
138
+ Optional Argument.
139
+ Specifies whether to apply clustering techniques.
140
+ Default Value: False
141
+ Types: bool
142
+
143
+ **kwargs:
144
+ Specifies the additional arguments for feature exploration.
145
+ Types: dict
146
+
147
+ RETURNS:
148
+ None
149
+
150
+ RAISES:
151
+ None
152
+
153
+ EXAMPLES:
154
+ >>> explorer = _FeatureExplore(data=df, target_column="target", verbose=1)
155
+ """
156
+ self.data = data
157
+ self.target_column = target_column
158
+ self.verbose = verbose
159
+ self.custom_data = custom_data
160
+ self.data_transform_dict = {}
161
+ self.data_types = {key: value for key, value in self.data._column_names_and_types}
162
+ self.terminal_print = _is_terminal()
163
+ self.style = self._common_style()
164
+ self.task_type = task_type
165
+
166
+ self.fraud = fraud
167
+ self.churn = churn
168
+ self.cluster = cluster
169
+
170
+ def _exploration(self,
171
+ **kwargs):
172
+ """
173
+ DESCRIPTION:
174
+ Internal function performs following operations:
175
+ 1. Column summary of columns of the dataset
176
+ 2. Statistics of numeric columns of the dataset
177
+ 3. Categorical column summary
178
+ 4. Futile columns in the dataset
179
+ 5. Target column distribution, not applicable for Clustering task_type
180
+ 6. Outlier Percentage in numeric columns of the dataset
181
+ 7. Heatmap of Numerical Features
182
+ 8. Boxplots of Feature Distribution
183
+ 9. Countplot of Categorical features
184
+ 10.Scatterplot for selected features for Clustering task_type
185
+
186
+ PARAMETERS:
187
+ **kwargs:
188
+ Specifies the additional arguments for exploration.
189
+ Types: dict
190
+
191
+ RETURNS:
192
+ None
193
+
194
+ RAISES:
195
+ None
196
+
197
+ EXAMPLES:
198
+ >>> self._exploration()
199
+ """
200
+ numerical_columns = []
201
+ categorical_columns= []
202
+ date_column_list = []
203
+
204
+ aml_phases = kwargs.get('automl_phases', None)
205
+ self._display_heading(phase=0,
206
+ automl_phases=aml_phases)
207
+
208
+ self._display_msg(msg="Feature Exploration started")
209
+ # Detecting numerical and categorical column
210
+ for col, d_type in self.data._column_names_and_types:
211
+ if d_type in ['int','float']:
212
+ numerical_columns.append(col)
213
+ elif d_type in ['str']:
214
+ categorical_columns.append(col)
215
+ elif d_type in ['datetime.date','datetime.datetime']:
216
+ date_column_list.append(col)
217
+
218
+ # Display initial Count of data
219
+ self._display_msg(msg = 'Data Overview:', show_data=True)
220
+ self._logger.info(f"Total Rows in the data: {self.data.shape[0]}")
221
+ self._logger.info(f"Total Columns in the data: {self.data.shape[1]}")
222
+
223
+ # Displaying date columns
224
+ if len(date_column_list)!=0:
225
+ self._display_msg(msg='Identified Date Columns:',
226
+ data=date_column_list)
227
+
228
+ # Column Summary of each feature of data
229
+ # such as null count, datatype, non null count
230
+ self._column_summary()
231
+
232
+ # Displays statistics such as mean/median/mode
233
+ self._statistics()
234
+
235
+ # Categorcial Summary and futile column detection
236
+ if len(categorical_columns) != 0:
237
+ categorical_obj = self._categorical_summary(categorical_columns)
238
+ self._futile_column(categorical_obj)
239
+
240
+ if not self.cluster:
241
+ # Plot a graph of target column
242
+ self._target_column_details()
243
+
244
+
245
+ # Displays outlier percentage
246
+ if self.fraud or self.churn:
247
+ outlier_method = "percentile"
248
+ df = self._outlier_detection(outlier_method, numerical_columns, lower_percentile=0.01, upper_percentile=0.99)
249
+ else:
250
+ outlier_method = "Tukey"
251
+ df = self._outlier_detection(outlier_method, numerical_columns)
252
+
253
+
254
+ # Convert data to pandas once for all visualization functions
255
+ if (self.fraud or self.churn or self.cluster) and self._check_visualization_libraries() and not _is_terminal():
256
+ pandas_data = self.data.to_pandas().reset_index()
257
+
258
+ # Boxplots and Heatmap for feature distribution by target column
259
+ self._boxplot_heatmap(plot_data=pandas_data)
260
+
261
+ # Countplots for feature distribution by target column
262
+ self._countplot_categorical_distribution(plot_data=pandas_data)
263
+
264
+ if self.cluster:
265
+ # Use same pandas data for scatter plot
266
+ self._scatter_plot(plot_data=pandas_data)
267
+
268
+ def _statistics(self):
269
+ """
270
+ DESCRIPTION:
271
+ Internal function displays the statistics of numeric columns such mean, mode, median.
272
+
273
+ PARAMETERS:
274
+ None
275
+
276
+ RETURNS:
277
+ None
278
+
279
+ RAISES:
280
+ None
281
+
282
+ EXAMPLES:
283
+ >>> self._statistics()
284
+ """
285
+ # Statistics of numerical columns
286
+ self._display_msg(msg='Statistics of Data:',
287
+ data=self.data.describe(),
288
+ show_data=True)
289
+
290
+ def _column_summary(self):
291
+ """
292
+ DESCRIPTION:
293
+ Internal function displays the column summary of categorical column such as
294
+ datatype, null count, non null count, zero count.
295
+
296
+ PARAMETERS:
297
+ None
298
+
299
+ RETURNS:
300
+ None
301
+
302
+ RAISES:
303
+ None
304
+
305
+ EXAMPLES:
306
+ >>> self._column_summary()
307
+ """
308
+ dp.max_rows = self.data.shape[1]
309
+ # Column Summary of all columns of dataset
310
+ obj = ColumnSummary(data=self.data,
311
+ target_columns=self.data.columns)
312
+ self._display_msg(msg='Column Summary:',
313
+ data=obj.result,
314
+ show_data=True)
315
+ dp.max_rows = 10
316
+
317
+ def _categorical_summary(self,
318
+ categorical_columns=None):
319
+ """
320
+ DESCRIPTION:
321
+ Internal function display the categorical summary of categorical column such count, distinct values.
322
+
323
+ PARAMETERS:
324
+ categorical_columns:
325
+ Required Argument.
326
+ Specifies the categorical columns.
327
+ Types: str or list of strings (str)
328
+
329
+ RETURNS:
330
+ Instance of ColumnSummary.
331
+
332
+ RAISES:
333
+ None
334
+
335
+ EXAMPLES:
336
+ >>> obj = self._categorical_summary(categorical_columns=["category1", "category2"])
337
+ """
338
+ self._display_msg(msg='Categorical Columns with their Distinct values:',
339
+ show_data=True)
340
+
341
+ # Categorical Summary of categorical columns
342
+ obj = CategoricalSummary(data=self.data,
343
+ target_columns=categorical_columns)
344
+
345
+ catg_obj = obj.result[obj.result['DistinctValue'] != None]
346
+ print("{:<25} {:<10}".format("ColumnName", "DistinctValueCount"))
347
+ for col in categorical_columns:
348
+ dst_val = catg_obj[catg_obj['ColumnName'] == col].size//3
349
+ print("{:<25} {:<10}".format(col, dst_val))
350
+
351
+ return obj
352
+
353
+ def _futile_column(self,
354
+ categorical_obj):
355
+ """
356
+ DESCRIPTION:
357
+ Internal function detects the futile columns.
358
+
359
+ PARAMETERS:
360
+ categorical_obj:
361
+ Required Argument.
362
+ Specifies the instance of CategoricalSummary for futile column detection.
363
+ Types: Instance of CategoricalSummary
364
+
365
+ RETURNS:
366
+ None
367
+
368
+ RAISES:
369
+ None
370
+
371
+ EXAMPLES:
372
+ >>> self._futile_column(categorical_obj=cat_summary_obj)
373
+ """
374
+ # Futile columns detection using categorical column object
375
+ gfc_out = GetFutileColumns(data=self.data,
376
+ object=categorical_obj,
377
+ category_summary_column="ColumnName",
378
+ threshold_value=0.7)
379
+
380
+ # Extracts the futile column present in the first column
381
+ f_cols = [i[0] for i in gfc_out.result.itertuples()]
382
+
383
+ if len(f_cols) == 0:
384
+ self._display_msg(inline_msg='No Futile columns found.',
385
+ show_data=True)
386
+ else:
387
+ self._display_msg(msg='Futile columns in dataset:',
388
+ data=gfc_out.result,
389
+ show_data=True)
390
+
391
+ def _target_column_details(self,
392
+ plot_data=None):
393
+ """
394
+ DESCRIPTION:
395
+ Internal function displays the target column distribution of Target column/ Response column.
396
+
397
+ PARAMETERS:
398
+ plot_data:
399
+ Optional Argument.
400
+ Specifies the input teradataml DataFrame for plotting distribution.
401
+ Types: teradataml Dataframe
402
+
403
+ RETURNS:
404
+ None
405
+
406
+ RAISES:
407
+ None
408
+
409
+ EXAMPLES:
410
+ >>> self._target_column_details(plot_data=df)
411
+ """
412
+ if self._check_visualization_libraries() and not _is_terminal():
413
+ import matplotlib.pyplot as plt
414
+ import seaborn as sns
415
+ if plot_data is None:
416
+ target_data = self.data.select([self.target_column]).to_pandas()
417
+ else:
418
+ target_data = plot_data[[self.target_column]]
419
+ self._display_msg(msg='Target Column Distribution:',
420
+ show_data=True)
421
+ plt.figure(figsize=(8, 6))
422
+ # Ploting a histogram for target column
423
+ plt.hist(target_data, bins=10, density=True, edgecolor='black')
424
+ plt.xlabel(self.target_column)
425
+ plt.ylabel('Density')
426
+ plt.show()
427
+
428
+ def _countplot_categorical_distribution(self, plot_data, top_n=20, max_unique_threshold=50):
429
+ """
430
+ DESCRIPTION:
431
+ Function to plot count plots for categorical features based on the target column.
432
+ Limits the number of unique categories to avoid messy visuals.
433
+
434
+ PARAMETERS:
435
+ plot_data:
436
+ Required Argument.
437
+ Specifies the pre-converted pandas DataFrame for plotting distribution.
438
+ This parameter is always provided by the main _exploration() method for performance optimization.
439
+ Types: pandas DataFrame
440
+
441
+ top_n:
442
+ Optional Argument.
443
+ Maximum number of categories to display per feature.
444
+ Default Value: 20
445
+ Types: int
446
+
447
+ max_unique_threshold:
448
+ Optional Argument.
449
+ Only plot features with unique values below this threshold.
450
+ Default Value: 50
451
+ Types: int
452
+
453
+ RETURNS:
454
+ None
455
+
456
+ RAISES:
457
+ None
458
+
459
+ EXAMPLES:
460
+ >>> self._countplot_categorical_distribution(plot_data=df, top_n=15)
461
+ """
462
+ # Use the pre-converted pandas data
463
+ data = plot_data.copy()
464
+
465
+ target_column = self.target_column
466
+
467
+ # Select categorical features
468
+ categorical_features = data.select_dtypes(include=['object', 'category']).columns
469
+
470
+ if not self.cluster:
471
+ categorical_features = [col for col in categorical_features if col != target_column]
472
+
473
+ # Filter categorical features based on unique value threshold
474
+ categorical_features = [col for col in categorical_features if data[col].nunique() <= max_unique_threshold]
475
+
476
+ if len(categorical_features) == 0:
477
+ self._display_msg(msg="No categorical columns found with unique values within the threshold.")
478
+ return
479
+
480
+ self._display_msg(msg='Categorical Feature Distributions by Target Column (Count Plots):',
481
+ show_data=False)
482
+
483
+ for feature in categorical_features:
484
+ plt.figure(figsize=(10, 6))
485
+
486
+ # Get value counts and filter top N categories
487
+ value_counts = data[feature].value_counts()
488
+
489
+ top_categories = value_counts.nlargest(top_n).index.tolist()
490
+
491
+ # Remove duplicates while preserving order
492
+ top_categories = list(dict.fromkeys(top_categories))
493
+
494
+ # Replace less frequent categories with "Other"
495
+ data[feature] = data[feature].apply(lambda x: x if x in top_categories else "Other")
496
+
497
+
498
+ # Generate count plot
499
+ if not self.cluster:
500
+ cntplot = sns.countplot(data=data, x=feature, hue=target_column, order=top_categories)
501
+ else:
502
+ cntplot = sns.countplot(data=data, x=feature, order=top_categories)
503
+ for p in cntplot.patches:
504
+ height = p.get_height()
505
+ if height > 0: # Only display if height is greater than 0
506
+ cntplot.annotate(f'{int(height)}',
507
+ (p.get_x() + p.get_width() / 2, height),
508
+ ha='center', va='bottom', fontsize=10, fontweight='bold')
509
+
510
+
511
+ if not self.cluster:
512
+ plt.title(f"Distribution of {feature} by {target_column}")
513
+ else:
514
+ plt.title(f"Distribution of {feature}")
515
+ plt.xlabel(feature)
516
+ plt.ylabel("Count")
517
+ plt.xticks(rotation=45, ha='right') # Improve label visibility
518
+ if not self.cluster:
519
+ plt.legend(title=target_column)
520
+ plt.tight_layout()
521
+ plt.show()
522
+
523
+ def _correlation(self, data, threshold=0.1, max_features=10, min_features=2):
524
+ """
525
+ DESCRIPTION:
526
+ Function to calculate the correlation values between features.
527
+
528
+ PARAMETERS:
529
+ data:
530
+ Required Argument.
531
+ Specifies the input pandas DataFrame for correlation analysis.
532
+ Types: pandas DataFrame
533
+
534
+ threshold:
535
+ Optional Argument.
536
+ Specifies the minimum correlation threshold for feature selection.
537
+ Default Value: 0.1
538
+ Types: float
539
+
540
+ max_features:
541
+ Optional Argument.
542
+ Specifies the maximum number of features to select.
543
+ Default Value: 10
544
+ Types: int
545
+
546
+ min_features:
547
+ Optional Argument.
548
+ Specifies the minimum number of features to select as fallback.
549
+ Default Value: 2
550
+ Types: int
551
+
552
+ RETURNS:
553
+ tuple containing filtered correlations, selected features, correlation matrix, and selection criteria.
554
+
555
+ RAISES:
556
+ None
557
+
558
+ EXAMPLES:
559
+ >>> corr_result = self._correlation(data=df, threshold=0.2, max_features=8)
560
+ """
561
+ import numpy as np
562
+
563
+ numerical_features = data.select_dtypes(include=['float64', 'int64']).columns
564
+
565
+ # For AutoML, exclude target_column from numerical features
566
+ if not self.cluster and self.target_column in numerical_features:
567
+ numerical_features = [col for col in numerical_features if col != self.target_column]
568
+
569
+ total_numerical_features = len(numerical_features)
570
+
571
+ if self.cluster:
572
+ # Clustering: feature vs feature correlation
573
+ corr_matrix = data[numerical_features].corr()
574
+ # Extract upper triangle without diagonal
575
+ mask = np.triu(np.ones_like(corr_matrix, dtype=bool), k=1)
576
+ corr_vals = corr_matrix.where(mask).stack().reset_index()
577
+ corr_vals.columns = ['Feature1', 'Feature2', 'Correlation']
578
+ corr_vals['Abs_Correlation'] = corr_vals['Correlation'].abs()
579
+ corr_vals = corr_vals.sort_values(by='Abs_Correlation', ascending=False)
580
+
581
+ filtered = corr_vals[corr_vals['Abs_Correlation'] > threshold].head(max_features)
582
+ selection_criteria = "Top Correlated Feature Pairs"
583
+
584
+ if len(filtered) < 2:
585
+ filtered = corr_vals.head(min(2, len(corr_vals)))
586
+ selection_criteria = f"Top {min(2, len(corr_vals))} Correlated Feature Pairs (Fallback)"
587
+
588
+ # Merge unique features from pairs
589
+ selected_features = list(set(filtered['Feature1'].tolist() + filtered['Feature2'].tolist()))
590
+ selected_features = selected_features[:max_features] # restrict total features
591
+ corr_matrix = data[selected_features].corr()
592
+
593
+ return filtered, selected_features, corr_matrix, selection_criteria
594
+ else:
595
+ # AutoML: correlation with target column
596
+ correlation_values = data[numerical_features].corrwith(data[self.target_column])
597
+ correlation_df = correlation_values.reset_index()
598
+ correlation_df.columns = ['Feature', 'Correlation']
599
+ correlation_df['Abs_Correlation'] = correlation_df['Correlation'].abs()
600
+ correlation_df = correlation_df.sort_values(by='Abs_Correlation', ascending=False)
601
+
602
+ filtered = correlation_df[correlation_df['Abs_Correlation'] > threshold].head(max_features)
603
+ selection_criteria = "Features above threshold correlation with target"
604
+
605
+ if len(filtered) < 2:
606
+ filtered = correlation_df.head(min(min_features, total_numerical_features))
607
+ selection_criteria = f"Top {min(min_features, total_numerical_features)} Correlated Features (Fallback)"
608
+
609
+ selected_features = filtered['Feature'].tolist() + [self.target_column]
610
+ selected_features = list(dict.fromkeys(selected_features)) # preserve order, remove dup
611
+ corr_matrix = data[selected_features].corr()
612
+
613
+ return selected_features, corr_matrix, selection_criteria
614
+
615
+ def _boxplot_heatmap(self, plot_data):
616
+ """
617
+ DESCRIPTION:
618
+ Internal function to display heatmap and boxplots of selected numerical features.
619
+ Handles both AutoML (feature vs target) and Clustering (feature vs feature).
620
+
621
+ PARAMETERS:
622
+ plot_data:
623
+ Required Argument.
624
+ Specifies the pre-converted pandas DataFrame for plotting.
625
+ This parameter is always provided by the main _exploration() method for performance optimization.
626
+ Types: pandas DataFrame
627
+
628
+ RETURNS:
629
+ None
630
+
631
+ RAISES:
632
+ None
633
+
634
+ EXAMPLES:
635
+ >>> self._boxplot_heatmap(plot_data=df)
636
+ """
637
+ # Use the pre-converted pandas data
638
+ data = plot_data.copy()
639
+ # Handle ordinal encoding for pandas data if needed
640
+ if not self.cluster and self.data_types.get(self.target_column) in ['str']:
641
+ # For pandas data, convert categorical target to numeric codes
642
+ if data[self.target_column].dtype == 'object':
643
+ data[self.target_column] = data[self.target_column].astype('category').cat.codes
644
+
645
+ if not self.cluster:
646
+ # Get selected features and correlation matrix
647
+ selected_features, corr_matrix, selection_criteria = self._correlation(data=data)
648
+ else:
649
+ filtered, selected_features, corr_matrix, selection_criteria = self._correlation(data=data)
650
+
651
+ # Display heatmap
652
+ mask = np.triu(np.ones_like(corr_matrix, dtype=bool), k=0)
653
+ plt.figure(figsize=(8, 6))
654
+ sns.heatmap(corr_matrix, mask=mask, annot=True, cmap="coolwarm", fmt=".2f", linewidths=0.5)
655
+ plt.title("Heatmap of Selected Features")
656
+ plt.show()
657
+
658
+ num_features = len(selected_features)
659
+ self._display_msg(msg=f'Number of features selected for Boxplots: {num_features}', show_data=False)
660
+ self._display_msg(msg=f'Selection Criteria: {selection_criteria}', show_data=False)
661
+ self._display_msg(msg=f'Selected Features: {", ".join(selected_features)}', show_data=False)
662
+ self._display_msg(msg='Boxplots:', show_data=False)
663
+
664
+ if self.cluster:
665
+ num_plots = len(filtered)
666
+ cols = 2 if num_plots > 1 else 1
667
+ rows = (num_plots + cols - 1) // cols
668
+
669
+ fig, axes = plt.subplots(rows, cols, figsize=(12, rows * 4))
670
+ axes = axes.flatten() if len(filtered) > 1 else [axes]
671
+
672
+ for i, (idx, row) in enumerate(filtered.iterrows()):
673
+ if i >= len(axes):
674
+ break # prevent IndexError if more data than axes
675
+
676
+ feature_x, feature_y = row["Feature1"], row["Feature2"]
677
+
678
+ x_unique = data[feature_x].nunique()
679
+ x = data[feature_x]
680
+ if x_unique > 20:
681
+ x = pd.qcut(x, q=10, duplicates='drop')
682
+
683
+ sns.boxplot(x=x, y=data[feature_y], ax=axes[i])
684
+ axes[i].set_title(f"{feature_y} vs {feature_x}")
685
+ axes[i].set_xlabel(feature_x)
686
+ axes[i].set_ylabel(feature_y)
687
+ axes[i].tick_params(axis='x', rotation=45)
688
+ else:
689
+ # Prepare boxplot layout
690
+ num_features = len(selected_features)
691
+ cols = 2 if num_features > 1 else 1
692
+ rows = max((num_features // 2) + (num_features % 2),1)
693
+
694
+ rows = max(rows, 1)
695
+
696
+ fig, axes = plt.subplots(rows, cols, figsize=(12, rows * 4))
697
+ axes = axes.flatten() if num_features > 1 else [axes]
698
+ # AutoML: Plot boxplot of feature vs target column
699
+ for i, feature in enumerate(selected_features):
700
+ if feature != self.target_column:
701
+ sns.boxplot(x=data[self.target_column], y=data[feature], ax=axes[i])
702
+ axes[i].set_title(f"{feature}")
703
+ axes[i].set_xlabel(self.target_column)
704
+ axes[i].set_ylabel(feature)
705
+
706
+ plt.tight_layout()
707
+ plt.show()
708
+
709
+ def _scatter_plot(self, plot_data, max_selected_pairs=10, threshold=0.1):
710
+ """
711
+ DESCRIPTION:
712
+ Internal function to display scatterplots of selected numerical features.
713
+ Handles Clustering (feature vs feature).
714
+
715
+ PARAMETERS:
716
+ plot_data:
717
+ Required Argument.
718
+ Specifies the pre-converted pandas DataFrame for plotting scatter plots.
719
+ This parameter is always provided by the main _exploration() method for performance optimization.
720
+ Types: pandas DataFrame
721
+
722
+ max_selected_pairs:
723
+ Optional Argument.
724
+ Specifies the maximum number of feature pairs to select for scatter plots.
725
+ Default Value: 10
726
+ Types: int
727
+
728
+ threshold:
729
+ Optional Argument.
730
+ Specifies the minimum correlation threshold for feature pair selection.
731
+ Default Value: 0.1
732
+ Types: float
733
+
734
+ RETURNS:
735
+ None
736
+
737
+ RAISES:
738
+ None
739
+
740
+ EXAMPLES:
741
+ >>> self._scatter_plot(plot_data=df, max_selected_pairs=8, threshold=0.15)
742
+ """
743
+ # Use the pre-converted pandas data
744
+ data = plot_data.copy()
745
+
746
+ # Select numerical features
747
+ numerical_features = data.select_dtypes(include=['float64', 'int64']).columns
748
+ if len(numerical_features) < 2:
749
+ self._display_msg(msg="Not enough numerical features for scatter plots.")
750
+ return
751
+
752
+ # Compute correlation matrix
753
+ corr_matrix = data[numerical_features].corr()
754
+
755
+ # Extract upper triangle (excluding diagonal)
756
+ mask = np.triu(np.ones_like(corr_matrix, dtype=bool), k=1)
757
+ corr_vals = corr_matrix.where(mask).stack().reset_index()
758
+ corr_vals.columns = ['Feature1', 'Feature2', 'Correlation']
759
+ corr_vals['Abs_Correlation'] = corr_vals['Correlation'].abs()
760
+
761
+ # Sort and filter top pairs
762
+ corr_vals = corr_vals.sort_values(by='Abs_Correlation', ascending=False)
763
+ filtered = corr_vals[corr_vals['Abs_Correlation'] > threshold].head(max_selected_pairs)
764
+
765
+ if len(filtered) < 2:
766
+ filtered = corr_vals.head(min(2, len(corr_vals)))
767
+
768
+ if len(filtered) == 0:
769
+ self._display_msg(msg="No correlated pairs found above threshold.")
770
+ return
771
+
772
+ self._display_msg(msg=f"Scatter Plots for Top Correlated Feature Pairs:", show_data=False)
773
+
774
+ # Plot scatter plots
775
+ for _, row in filtered.iterrows():
776
+ feature_x, feature_y = row["Feature1"], row["Feature2"]
777
+
778
+ plt.figure(figsize=(6, 4))
779
+ sns.scatterplot(x=data[feature_x], y=data[feature_y], alpha=0.3)
780
+ plt.xlabel(feature_x)
781
+ plt.ylabel(feature_y)
782
+ plt.title(f"Scatter Plot: {feature_x} vs {feature_y} (Corr: {row['Correlation']:.2f})")
783
+ plt.tight_layout()
784
+ plt.show()
785
+
786
+ def _ordinal_encoding(self,
787
+ ordinal_columns):
788
+ """
789
+ DESCRIPTION:
790
+ Function performs the ordinal encoding to categorical columns or features in the dataset.
791
+
792
+ PARAMETERS:
793
+ ordinal_columns:
794
+ Required Argument.
795
+ Specifies the categorical columns for which ordinal encoding will be performed.
796
+ Types: str or list of strings (str)
797
+
798
+ RETURNS:
799
+ None
800
+
801
+ RAISES:
802
+ None
803
+
804
+ EXAMPLES:
805
+ >>> self._ordinal_encoding(ordinal_columns=["category1", "category2"])
806
+ """
807
+ # Setting volatile and persist parameters for performing encoding
808
+ volatile, persist = self._get_generic_parameters(func_indicator="CategoricalEncodingIndicator",
809
+ param_name="CategoricalEncodingParam")
810
+
811
+ # Adding fit parameters for performing encoding
812
+ fit_params = {
813
+ "data" : self.data,
814
+ "target_column" : ordinal_columns,
815
+ "volatile" : volatile,
816
+ "persist" : persist
817
+ }
818
+ # Performing ordinal encoding fit on target columns
819
+ ord_fit_obj = OrdinalEncodingFit(**fit_params)
820
+ # Storing fit object and column list for ordinal encoding in data transform dictionary
821
+ if ordinal_columns[0] != self.target_column:
822
+ self.data_transform_dict["custom_ord_encoding_fit_obj"] = ord_fit_obj.result
823
+ self.data_transform_dict['custom_ord_encoding_col'] = ordinal_columns
824
+ else:
825
+ self.data_transform_dict['target_col_encode_ind'] = True
826
+ self.data_transform_dict['target_col_ord_encoding_fit_obj'] = ord_fit_obj.result
827
+
828
+ # Extracting accumulate columns
829
+ accumulate_columns = self._extract_list(self.data.columns, ordinal_columns)
830
+ # Adding transform parameters for performing encoding
831
+ transform_params = {
832
+ "data" : self.data,
833
+ "object" : ord_fit_obj.result,
834
+ "accumulate" : accumulate_columns,
835
+ "persist" : True
836
+ }
837
+ # Disabling display table name if persist is True by default
838
+ if not volatile and not persist:
839
+ transform_params["display_table_name"] = False
840
+
841
+ # Setting persist to False if volatile is True
842
+ if volatile:
843
+ transform_params["volatile"] = True
844
+ transform_params["persist"] = False
845
+ # Performing ordinal encoding transformation
846
+ self.data = OrdinalEncodingTransform(**transform_params).result
847
+
848
+ if not volatile and not persist:
849
+ # Adding transformed data containing table to garbage collector
850
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
851
+
852
+ if len(ordinal_columns) == 1 and ordinal_columns[0] == self.target_column:
853
+ self.target_label = ord_fit_obj
854
+
855
+ def _extract_list(self,
856
+ list1,
857
+ list2):
858
+ """
859
+ DESCRIPTION:
860
+ Function to extract elements from list1 which are not present in list2.
861
+
862
+ PARAMETERS:
863
+ list1:
864
+ Required Argument.
865
+ Specifies the first list for extracting elements from.
866
+ Types: list
867
+
868
+ list2:
869
+ Required Argument.
870
+ Specifies the second list to get elements for avoiding in first list while extracting.
871
+ Types: list
872
+
873
+ RETURNS:
874
+ list containing extracted elements.
875
+
876
+ RAISES:
877
+ None
878
+
879
+ EXAMPLES:
880
+ >>> result = self._extract_list(list1=["a", "b", "c"], list2=["b"])
881
+ """
882
+ new_lst = list(set(list1) - set(list2))
883
+ return new_lst
884
+
885
+ def _get_generic_parameters(self,
886
+ func_indicator=None,
887
+ param_name=None):
888
+ """
889
+ DESCRIPTION:
890
+ Function to get generic parameters.
891
+
892
+ PARAMETERS:
893
+ func_indicator:
894
+ Optional Argument.
895
+ Specifies the name of function indicator.
896
+ Types: str
897
+
898
+ param_name:
899
+ Optional Argument.
900
+ Specifies the name of the param which contains generic parameters.
901
+ Types: str
902
+
903
+ RETURNS:
904
+ Tuple containing volatile and persist parameters.
905
+
906
+ RAISES:
907
+ None
908
+
909
+ EXAMPLES:
910
+ >>> volatile, persist = self._get_generic_parameters(func_indicator="CategoricalEncodingIndicator",
911
+ param_name="CategoricalEncodingParam")
912
+ """
913
+ volatile = self.volatile
914
+ persist = self.persist
915
+ if self.custom_data is not None and self.custom_data.get(func_indicator, False):
916
+ volatile = self.custom_data[param_name].get("volatile", False)
917
+ persist = self.custom_data[param_name].get("persist", False)
918
+
919
+ return (volatile, persist)
920
+
921
+ def _check_visualization_libraries(self):
922
+ """
923
+ DESCRIPTION:
924
+ Internal function Checks the availability of data visualization libraries.
925
+
926
+ PARAMETERS:
927
+ None
928
+
929
+ RETURNS:
930
+ bool
931
+
932
+ RAISES:
933
+ None
934
+
935
+ EXAMPLES:
936
+ >>> has_libs = self._check_visualization_libraries()
937
+ """
938
+
939
+ # Conditional import
940
+ try:
941
+ import matplotlib.pyplot as plt
942
+ import seaborn as sns
943
+ except ImportError:
944
+ print("Install seaborn and matplotlib libraries to visualize the data.")
945
+ return False
946
+
947
+ return True
948
+
949
+ def _outlier_detection(self,
950
+ outlier_method,
951
+ column_list,
952
+ lower_percentile=None,
953
+ upper_percentile=None):
954
+ """
955
+ DESCRIPTION:
956
+ Function detects the outlier in numerical column and display thier percentage.
957
+
958
+ PARAMETERS:
959
+ outlier_method:
960
+ Required Argument.
961
+ Specifies the outlier method required for outlier detection.
962
+ Types: str
963
+
964
+ column_list:
965
+ Required Argument.
966
+ Specifies the numeric columns for outlier percentage calculation.
967
+ Types: str or list of strings (str)
968
+
969
+ lower_percentile:
970
+ Optional Argument.
971
+ Specifies the lower percentile value for outlier detection in case of percentile method.
972
+ Types: float
973
+
974
+ upper_percentile:
975
+ Optional Argument.
976
+ Specifies the upper percentile value for outlier detection in case of percentile method.
977
+ Types: float
978
+
979
+ RETURNS:
980
+ Pandas DataFrame containing column name with outlier percentage.
981
+
982
+ RAISES:
983
+ None
984
+
985
+ EXAMPLES:
986
+ >>> outlier_df = self._outlier_detection(outlier_method="Tukey", column_list=["num1", "num2"])
987
+ """
988
+ # Removing target column from the list of columns
989
+ column_list = [col for col in column_list if col != self.target_column]
990
+
991
+ # Performing outlier fit on the data for replacing outliers with NULL value
992
+ fit_params = {
993
+ "data" : self.data,
994
+ "target_columns" : column_list,
995
+ "outlier_method" : outlier_method,
996
+ "lower_percentile" : lower_percentile,
997
+ "upper_percentile" : upper_percentile,
998
+ "replacement_value" : 'NULL'
999
+ }
1000
+ OutlierFilterFit_out = OutlierFilterFit(**fit_params)
1001
+ transform_params = {
1002
+ "data" : self.data,
1003
+ "object" : OutlierFilterFit_out.result
1004
+ }
1005
+ # Performing outlier transformation on each column
1006
+ OutlierTransform_obj = OutlierFilterTransform(**transform_params)
1007
+
1008
+ # Column summary of each column of the data
1009
+ fit_params = {
1010
+ "data" : OutlierTransform_obj.result,
1011
+ "target_columns" : column_list
1012
+ }
1013
+ colSummary = ColumnSummary(**fit_params)
1014
+
1015
+ null_count_expr = colSummary.result.NullCount
1016
+ non_null_count_expr = colSummary.result.NonNullCount
1017
+
1018
+ # Calculating outlier percentage
1019
+ df = colSummary.result.assign(True,
1020
+ ColumnName = colSummary.result.ColumnName,
1021
+ OutlierPercentage = (null_count_expr/(non_null_count_expr+null_count_expr))*100)
1022
+
1023
+ # Displaying non-zero containing outlier percentage for columns
1024
+ df = df[df['OutlierPercentage']>0]
1025
+ if self.verbose > 0:
1026
+ print(" "*500, end='\r')
1027
+ if df.shape[0] > 0:
1028
+ self._display_msg(msg='Columns with outlier percentage :-',
1029
+ show_data=True)
1030
+ print(df)
1031
+ else:
1032
+ self._display_msg(msg="No outlier found!")
1033
+
1034
+ return df
1035
+
1036
+ def _common_style(self):
1037
+ """
1038
+ DESCRIPTION:
1039
+ Internal Function sets the style tag for HTML.
1040
+
1041
+ PARAMETERS:
1042
+ None
1043
+
1044
+ RETURNS:
1045
+ str containing style tag.
1046
+
1047
+ RAISES:
1048
+ None
1049
+
1050
+ EXAMPLES:
1051
+ >>> style_str = self._common_style()
1052
+ """
1053
+ style = '''
1054
+ <style>
1055
+ .custom-div {
1056
+ background-color: lightgray;
1057
+ color: #000000;
1058
+ padding: 10px;
1059
+ border-radius: 8px;
1060
+ box-shadow: 0 3px 4px rgba(0, 0, 0, 0.2);
1061
+ margin-bottom: 10px;
1062
+ text-align: center;
1063
+ }
1064
+ </style>
1065
+ '''
1066
+ return style
1067
+
1068
+ def _display_heading(self,
1069
+ phase=0,
1070
+ progress_bar=None,
1071
+ **kwargs):
1072
+ """
1073
+ DESCRIPTION:
1074
+ Internal function to print the phase of AutoML that
1075
+ completed in green color.
1076
+
1077
+ PARAMETERS:
1078
+ phase:
1079
+ Optional Argument.
1080
+ Specifies the phase of automl that completed.
1081
+ Default Value: 0
1082
+ Types: int
1083
+
1084
+ progress_bar:
1085
+ Optional Argument.
1086
+ Specifies the _ProgressBar object.
1087
+ Types: object (_ProgressBar)
1088
+
1089
+ **kwargs:
1090
+ Specifies the additional arguments for display heading.
1091
+ Types: dict
1092
+
1093
+ RETURNS:
1094
+ None
1095
+
1096
+ RAISES:
1097
+ None
1098
+
1099
+ EXAMPLES:
1100
+ >>> self._display_heading(phase=1)
1101
+ """
1102
+ phases = ["1. Feature Exploration ->", " 2. Feature Engineering ->",
1103
+ " 3. Data Preparation ->", " 4. Model Training & Evaluation"]
1104
+ # Phases of automl
1105
+ if kwargs.get('automl_phases', None) is not None:
1106
+ steps = kwargs.get('automl_phases')
1107
+ else:
1108
+ steps = phases
1109
+
1110
+ # Check verbose > 0
1111
+ if self.verbose > 0:
1112
+
1113
+ # Check if code is running in IPython enviornment
1114
+ if not self.terminal_print:
1115
+ # Highlightedt phases of automl
1116
+ highlighted_steps = "".join(steps[:phase])
1117
+
1118
+ # Unhighlighted phases of automl
1119
+ unhighlighted_steps = "".join(steps[phase:])
1120
+
1121
+ # Combining highlighted and unhighlighted phases
1122
+ msg = self.style + f'<br><div class="custom-div"><h3><span style="color: green;">{highlighted_steps}</span>{unhighlighted_steps}<center></h3></center></div>'
1123
+ # Displaying the msg
1124
+ if progress_bar is not None:
1125
+ progress_bar.update(msg=msg,
1126
+ progress=False,
1127
+ ipython=True)
1128
+ else:
1129
+ display(HTML(msg))
1130
+ else:
1131
+ try:
1132
+ # Try to import colorama if not already imported
1133
+ from colorama import Fore, Style, init
1134
+ # initalize the color package
1135
+ init()
1136
+
1137
+ # Highlight the phases of automl
1138
+ highlighted_steps = "".join([Fore.GREEN + Style.BRIGHT + step + Style.RESET_ALL for step in steps[:phase]])
1139
+
1140
+ # Unhighlighted the phases of automl
1141
+ unhighlighted_steps = "".join(steps[phase:])
1142
+
1143
+ # Combining highlighted and unhighlighted phases
1144
+ msg = f'{highlighted_steps}{unhighlighted_steps}'
1145
+
1146
+ except ImportError:
1147
+ msg = "".join(step for step in steps)
1148
+
1149
+ if progress_bar is not None:
1150
+ progress_bar.update(msg=msg,
1151
+ progress=False)
1152
+ else:
1153
+ print(msg)
1154
+
1155
+ def _display_msg(self,
1156
+ msg=None,
1157
+ progress_bar=None,
1158
+ inline_msg=None,
1159
+ data=None,
1160
+ col_lst=None,
1161
+ show_data=False):
1162
+ """
1163
+ DESCRIPTION:
1164
+ Internal Function to print statement according to
1165
+ environment.
1166
+
1167
+ PARAMETERS:
1168
+ msg:
1169
+ Optional Argument.
1170
+ Specifies the message to print.
1171
+ Types: str
1172
+
1173
+ progress_bar:
1174
+ Optional Argument.
1175
+ Specifies the _ProgressBar object.
1176
+ Types: object (_ProgressBar)
1177
+
1178
+ inline_msg:
1179
+ Optional Argument.
1180
+ Specifies the additional information to print.
1181
+ Types: str
1182
+
1183
+ data:
1184
+ Optional Argument.
1185
+ Specifies the teradataml dataframe to print.
1186
+ Types: teradataml DataFrame
1187
+
1188
+ col_lst:
1189
+ Optional Argument.
1190
+ Specifies the list of columns.
1191
+ Types: list of str/int/data.time
1192
+
1193
+ show_data:
1194
+ Optional Argument.
1195
+ Specifies whether to print msg/data when verbose<2.
1196
+ Default Value: False
1197
+ Types: bool
1198
+
1199
+ RETURNS:
1200
+ None
1201
+
1202
+ RAISES:
1203
+ None
1204
+
1205
+ EXAMPLES:
1206
+ >>> self._display_msg(msg="Processing data", show_data=True)
1207
+ """
1208
+ # If verbose level is set to 2
1209
+ if self.verbose == 2:
1210
+ # If a progress bar is provided
1211
+ if progress_bar:
1212
+ # If a message is provided
1213
+ if msg:
1214
+ progress_bar.clear_line()
1215
+ self._logger.info(f"{msg}")
1216
+ # Update the progress bar with the message and either the column list or data (if they are not None)
1217
+ # passing empty message to avoid duplication of message in progress bar
1218
+ progress_bar.update(msg="", data=col_lst if col_lst else data if data is not None else None,
1219
+ progress=False,
1220
+ ipython=not self.terminal_print)
1221
+ # Displaying shape of data
1222
+ if data is not None:
1223
+ progress_bar.update(msg=f'{data.shape[0]} rows X {data.shape[1]} columns',
1224
+ progress=False,
1225
+ ipython=not self.terminal_print)
1226
+ # If an inline message is provided instead
1227
+ elif inline_msg:
1228
+ progress_bar.clear_line()
1229
+ self._logger.info(f"{inline_msg}")
1230
+ # If no progress bar is provided
1231
+ else:
1232
+ # If a message is provided
1233
+ if msg:
1234
+ # Print the message
1235
+ self._logger.info(f"{msg}")
1236
+ # If a column list is provided
1237
+ if col_lst:
1238
+ # Print the column list
1239
+ self._logger.info(col_lst)
1240
+ # If data is provided instead
1241
+ elif data is not None:
1242
+ # Print the data if terminal_print is True, else display the data
1243
+ print(data) if self.terminal_print else display(data)
1244
+ # If an inline message is provided instead
1245
+ elif inline_msg:
1246
+ # Print the inline message
1247
+ self._logger.info(f'{inline_msg}')
1248
+ # Exit the function after handling verbose level 2
1249
+ return
1250
+
1251
+ # If verbose level is more than 0 and show_data is True
1252
+ if self.verbose > 0 and show_data:
1253
+ # If a progress bar and a message are provided
1254
+ if progress_bar and msg:
1255
+ progress_bar.clear_line()
1256
+ self._logger.info(f"{msg}")
1257
+ # Update the progress bar with the message and data (if data is not None)
1258
+ # passing empty message to avoid duplication of message in progress bar
1259
+ progress_bar.update(msg="", data=data if data is not None else None,
1260
+ progress=False, ipython=not self.terminal_print)
1261
+ # If no progress bar is provided
1262
+ else:
1263
+ # If a message is provided
1264
+ if msg:
1265
+ # Print the message if terminal_print is True, else display the message
1266
+ self._logger.info(f'{msg}')
1267
+ # If data is provided
1268
+ if data is not None:
1269
+ # Print the data if terminal_print is True, else display the data
1270
+ print(data) if self.terminal_print else display(data)
1271
+
1272
+ @staticmethod
1273
+ def _visualize(data,
1274
+ target_column,
1275
+ plot_type=["target"],
1276
+ length=10,
1277
+ breadth=8,
1278
+ max_features=10,
1279
+ columns=None,
1280
+ problem_type=None):
1281
+ """
1282
+ DESCRIPTION:
1283
+ Internal function to visualize the data using various plots such as heatmap,
1284
+ pair plot, density, count plot, box plot, and target distribution.
1285
+
1286
+ PARAMETERS:
1287
+ data:
1288
+ Required Argument.
1289
+ Specifies the input teradataml DataFrame for plotting.
1290
+ Types: teradataml Dataframe
1291
+
1292
+ target_column:
1293
+ Required Argument.
1294
+ Specifies the name of the target column in "data".
1295
+ Types: str
1296
+
1297
+ plot_type:
1298
+ Optional Argument.
1299
+ Specifies the type of plot to be displayed.
1300
+ Default Value: "target"
1301
+ Permitted Values:
1302
+ * "heatmap": Displays a heatmap of feature correlations.
1303
+ * "pair": Displays a pair plot of features.
1304
+ * "density": Displays a density plot of features.
1305
+ * "count": Displays a count plot of categorical features.
1306
+ * "box": Displays a box plot of numerical features.
1307
+ * "target": Displays the distribution of the target variable.
1308
+ * "all": Displays all the plots.
1309
+ Types: str, list of str
1310
+
1311
+ length:
1312
+ Optional Argument.
1313
+ Specifies the length of the plot.
1314
+ Default Value: 10
1315
+ Types: int
1316
+
1317
+ breadth:
1318
+ Optional Argument.
1319
+ Specifies the breadth of the plot.
1320
+ Default Value: 8
1321
+ Types: int
1322
+
1323
+ columns:
1324
+ Optional Argument.
1325
+ Specifies the column names to be used for plotting.
1326
+ Types: str or list of string
1327
+
1328
+ max_features:
1329
+ Optional Argument.
1330
+ Specifies the maximum number of features to be used for plotting.
1331
+ Default Value: 10
1332
+ Note:
1333
+ * It applies separately to categorical and numerical features.
1334
+ Types: int
1335
+
1336
+ problem_type:
1337
+ Optional Argument.
1338
+ Specifies the type of problem.
1339
+ Permitted Values:
1340
+ * 'regression'
1341
+ * 'classification'
1342
+ Types: str
1343
+
1344
+ RETURNS:
1345
+ None
1346
+
1347
+ RAISES:
1348
+ TeradataMlException, ValueError, TypeError
1349
+
1350
+ EXAMPLES:
1351
+ >>> _FeatureExplore._visualize(data=data,
1352
+ target_column="target",
1353
+ plot_type="heatmap",
1354
+ length=10,
1355
+ breadth=8,
1356
+ max_features=10,
1357
+ columns=["feature1", "feature2"],
1358
+ problem_type="regression")
1359
+ """
1360
+ # Appending arguments to list for validation
1361
+ arg_info_matrix = []
1362
+ arg_info_matrix.append(["data", data, False, (DataFrame)])
1363
+ arg_info_matrix.append(["target_column", target_column, False, (str)])
1364
+ arg_info_matrix.append(["plot_type", plot_type, True, (str, list), True, ["heatmap", "pair", "all",
1365
+ "density", "count", "box", "target"]])
1366
+ arg_info_matrix.append(["length", length, True, (int)])
1367
+ arg_info_matrix.append(["breadth", breadth, True, (int)])
1368
+ arg_info_matrix.append(["max_features", max_features, True, (int)])
1369
+ arg_info_matrix.append(["problem_type", problem_type, True, (str), True, ["regression", "classification"]])
1370
+ arg_info_matrix.append(["columns", columns, True, (str, list)])
1371
+
1372
+ # Validate argument types
1373
+ _Validators._validate_function_arguments(arg_info_matrix)
1374
+
1375
+ # Validate that data has the required columns
1376
+ _Validators._validate_dataframe_has_argument_columns(target_column, "target_column", data, "data")
1377
+ _Validators._validate_dataframe_has_argument_columns(columns, "columns", data, "data")
1378
+
1379
+ # Convert data to pandas DataFrame if it's a teradataml DataFrame
1380
+ cols = data.columns
1381
+ data = data.to_pandas().reset_index()
1382
+ # avoiding the index column
1383
+ data = data[cols]
1384
+
1385
+ available_plots = ["target", "density", "count", "box", "pair", "heatmap"]
1386
+
1387
+ # if target_column is str
1388
+ if isinstance(target_column, str):
1389
+ data[target_column] = data[target_column].astype("category").cat.codes
1390
+
1391
+ if plot_type == "all":
1392
+ plot_type = available_plots
1393
+ else:
1394
+ plot_type = UtilFuncs._as_list(plot_type)
1395
+
1396
+ # Identify numerical and categorical columns
1397
+ numerical_features = data.select_dtypes(include=['number']).columns.drop(target_column).tolist()
1398
+ categorical_features = data.select_dtypes(include=['object', 'category']).columns.tolist()
1399
+
1400
+ # Handle selected_columns input
1401
+ if columns:
1402
+ selected_columns = UtilFuncs._as_list(columns)
1403
+ selected_num_features = [col for col in selected_columns if col in numerical_features][:max_features]
1404
+ selected_cat_features = [col for col in selected_columns if col in categorical_features][:max_features]
1405
+ else:
1406
+ # Compute correlation with target and select top correlated numerical features
1407
+ if target_column in data.columns and pd.api.types.is_numeric_dtype(data[target_column]):
1408
+ selected_num_features = (
1409
+ data[numerical_features]
1410
+ .corrwith(data[target_column])
1411
+ .abs()
1412
+ .nlargest(max_features)
1413
+ .index.tolist()
1414
+ )
1415
+ else:
1416
+ selected_num_features = numerical_features[:max_features]
1417
+
1418
+ # Select top categorical features based on appearance
1419
+ selected_cat_features = categorical_features[:max_features]
1420
+
1421
+ irrelevant_plot = []
1422
+
1423
+ # Sort plot_type based on the order in available_plots
1424
+ # display univariate plots first, then bivariate, and finally multivariate
1425
+ sorted_plot_type = sorted(plot_type, key=lambda x: available_plots.index(x.lower()))
1426
+
1427
+ for plot in sorted_plot_type:
1428
+ # Target Distribution
1429
+ if plot.lower() == "target":
1430
+ msg = _FeatureExplore._target_distribution(data=data,
1431
+ target_column=target_column,
1432
+ problem_type=problem_type,
1433
+ length=length,
1434
+ breadth=breadth)
1435
+ # Density Plot (for numerical features) - Grid
1436
+ elif plot.lower() == "density":
1437
+ msg = _FeatureExplore._density_plot(data=data,
1438
+ length=length,
1439
+ breadth=breadth,
1440
+ numerical_features=selected_num_features)
1441
+ # Count Plot (for categorical features) - Grid
1442
+ elif plot.lower() == "count":
1443
+ msg = _FeatureExplore._count_plot(data=data,
1444
+ length=length,
1445
+ breadth=breadth,
1446
+ categorical_features=selected_cat_features)
1447
+ # Box Plot (for numerical features) - Grid
1448
+ elif plot.lower() == "box":
1449
+ msg = _FeatureExplore._box_plot(data=data,
1450
+ length=length,
1451
+ breadth=breadth,
1452
+ numerical_features=selected_num_features)
1453
+ # Scatter Plot / Pair Plot
1454
+ elif plot.lower() == "pair":
1455
+ msg = _FeatureExplore._pair_plot(data=data,
1456
+ target_column=target_column,
1457
+ length=length,
1458
+ breadth=breadth,
1459
+ numerical_features=selected_num_features,
1460
+ categorical_features=selected_cat_features)
1461
+ # Heatmap
1462
+ elif plot.lower() == "heatmap":
1463
+ msg = _FeatureExplore._heatmap(data=data,
1464
+ target_column=target_column,
1465
+ length=length,
1466
+ breadth=breadth,
1467
+ numerical_features=selected_num_features)
1468
+
1469
+ if msg:
1470
+ irrelevant_plot.append(msg)
1471
+
1472
+ if irrelevant_plot:
1473
+ for msg in irrelevant_plot:
1474
+ print(msg)
1475
+
1476
+ @staticmethod
1477
+ def _heatmap(data,
1478
+ target_column,
1479
+ length=10,
1480
+ breadth=8,
1481
+ numerical_features=[]):
1482
+ """
1483
+ DESCRIPTION:
1484
+ Internal function to visualize the data using heatmap.
1485
+
1486
+ PARAMETERS:
1487
+ data:
1488
+ Required Argument.
1489
+ Specifies the input pandas DataFrame for plotting.
1490
+ Types: pandas Dataframe
1491
+
1492
+ target_column:
1493
+ Required Argument.
1494
+ Specifies the name of the target column in "data".
1495
+ Types: str
1496
+
1497
+ length:
1498
+ Optional Argument.
1499
+ Specifies the length of the plot.
1500
+ Default Value: 10
1501
+ Types: int
1502
+
1503
+ breadth:
1504
+ Optional Argument.
1505
+ Specifies the breadth of the plot.
1506
+ Default Value: 8
1507
+ Types: int
1508
+
1509
+ numerical_features:
1510
+ Optional Argument.
1511
+ Specifies the list of numerical features to be plotted.
1512
+ Types: list of str
1513
+
1514
+ RETURNS:
1515
+ str
1516
+
1517
+ RAISES:
1518
+ None
1519
+
1520
+ EXAMPLES:
1521
+ >>> _FeatureExplore._heatmap(data=data,
1522
+ target_column="target",
1523
+ length=10,
1524
+ breadth=8,
1525
+ numerical_features=["feature1", "feature2"])
1526
+
1527
+ """
1528
+ if len(numerical_features) >= 1:
1529
+ plt.figure(figsize=(length, breadth))
1530
+ sns.heatmap(data[numerical_features + [target_column]].corr(), annot=True, cmap="coolwarm")
1531
+ plt.title("Feature Correlation Heatmap")
1532
+ plt.show()
1533
+ else:
1534
+ return f"Plot type 'heatmap' is not applicable as no numerical features are available."
1535
+
1536
+ @staticmethod
1537
+ def _pair_plot(data,
1538
+ target_column,
1539
+ length=10,
1540
+ breadth=8,
1541
+ numerical_features=[],
1542
+ categorical_features=[]):
1543
+ """
1544
+ DESCRIPTION:
1545
+ Internal function to visualize the data using pair plot.
1546
+
1547
+ PARAMETERS:
1548
+ data:
1549
+ Required Argument.
1550
+ Specifies the input pandas DataFrame for plotting.
1551
+ Types: pandas Dataframe
1552
+
1553
+ target_column:
1554
+ Required Argument.
1555
+ Specifies the name of the target column in "data".
1556
+ Types: str
1557
+
1558
+ length:
1559
+ Optional Argument.
1560
+ Specifies the length of the plot.
1561
+ Default Value: 10
1562
+ Types: int
1563
+
1564
+ breadth:
1565
+ Optional Argument.
1566
+ Specifies the breadth of the plot.
1567
+ Default Value: 8
1568
+ Types: int
1569
+
1570
+ numerical_features:
1571
+ Optional Argument.
1572
+ Specifies the list of numerical features to be plotted.
1573
+ Types: list of str
1574
+
1575
+ categorical_features:
1576
+ Optional Argument.
1577
+ Specifies the list of categorical features to be plotted.
1578
+ Types: list of str
1579
+
1580
+ RETURNS:
1581
+ str
1582
+
1583
+ RAISES:
1584
+ None
1585
+
1586
+ EXAMPLES:
1587
+ >>> _FeatureExplore._pair_plot(data=data,
1588
+ target_column="target",
1589
+ length=10,
1590
+ breadth=8,
1591
+ numerical_features=["feature1", "feature2"])
1592
+
1593
+ """
1594
+ if len(numerical_features) >= 1:
1595
+ pair = sns.pairplot(data[numerical_features + [target_column]],
1596
+ hue=target_column if target_column in categorical_features else None)
1597
+
1598
+ # Add a centered title
1599
+ pair.figure.suptitle("pair Plot", fontsize=16, y=1.02)
1600
+ plt.show()
1601
+ else:
1602
+ return f"Plot type 'pair' is not applicable as no numerical features are available."
1603
+
1604
+ @staticmethod
1605
+ def _density_plot(data,
1606
+ length=10,
1607
+ breadth=8,
1608
+ numerical_features=[]):
1609
+ """
1610
+ DESCRIPTION:
1611
+ Internal function to visualize the data using density plot.
1612
+
1613
+ PARAMETERS:
1614
+ data:
1615
+ Required Argument.
1616
+ Specifies the input pandas DataFrame for plotting.
1617
+ Types: pandas Dataframe
1618
+
1619
+ length:
1620
+ Optional Argument.
1621
+ Specifies the length of the plot.
1622
+ Default Value: 10
1623
+ Types: int
1624
+
1625
+ breadth:
1626
+ Optional Argument.
1627
+ Specifies the breadth of the plot.
1628
+ Default Value: 8
1629
+ Types: int
1630
+
1631
+ numerical_features:
1632
+ Optional Argument.
1633
+ Specifies the list of numerical features to be plotted.
1634
+ Types: list of str
1635
+
1636
+ RETURNS:
1637
+ str
1638
+
1639
+ RAISES:
1640
+ None
1641
+
1642
+ EXAMPLES:
1643
+ >>> _FeatureExplore._density_plot(data=data,
1644
+ length=10,
1645
+ breadth=8,
1646
+ numerical_features=["feature1", "feature2"])
1647
+
1648
+ """
1649
+ if len(numerical_features) >= 1:
1650
+ rows = math.ceil(len(numerical_features) / 3)
1651
+ fig, axes = plt.subplots(rows, 3, figsize=(length, breadth))
1652
+ axes = axes.flatten()
1653
+ fig.suptitle("Density plot", fontsize=14)
1654
+
1655
+ for i, feature in enumerate(numerical_features):
1656
+ sns.kdeplot(data[feature], fill=True, color="green", alpha=0.6, ax=axes[i])
1657
+
1658
+ # Hide any empty subplots
1659
+ for i in range(len(numerical_features), len(axes)):
1660
+ axes[i].axis('off')
1661
+
1662
+ plt.tight_layout()
1663
+ plt.show()
1664
+ return None
1665
+ else:
1666
+ return f"Plot type 'density' is not applicable as no numerical features are available."
1667
+
1668
+ @staticmethod
1669
+ def _target_distribution(data,
1670
+ target_column,
1671
+ problem_type=None,
1672
+ length=10,
1673
+ breadth=8):
1674
+ """
1675
+ DESCRIPTION:
1676
+ Function visualizes the target distribution.
1677
+
1678
+ PARAMETERS:
1679
+ data:
1680
+ Required Argument.
1681
+ Specifies the input pandas DataFrame for plotting.
1682
+ Types: pandas Dataframe
1683
+
1684
+ target_column:
1685
+ Required Argument.
1686
+ Specifies the name of the target column in "data".
1687
+ Types: str
1688
+
1689
+ problem_type:
1690
+ Optional Argument.
1691
+ Specifies the type of problem.
1692
+ Permitted Values:
1693
+ * 'regression'
1694
+ * 'classification'
1695
+ Types: str
1696
+
1697
+ length:
1698
+ Optional Argument.
1699
+ Specifies the length of the plot.
1700
+ Default Value: 10
1701
+ Types: int
1702
+
1703
+ breadth:
1704
+ Optional Argument.
1705
+ Specifies the breadth of the plot.
1706
+ Default Value: 8
1707
+ Types: int
1708
+
1709
+ RETURNS:
1710
+ None
1711
+
1712
+ RAISES:
1713
+ None
1714
+
1715
+ EXAMPLES:
1716
+ >>> _FeatureExplore._target_distribution(data=data, target_column="target", problem_type="classification")
1717
+ """
1718
+ plt.figure(figsize=(length, breadth))
1719
+ # Categorical Target
1720
+ if (problem_type is None and data[target_column].nunique() <= 20) or \
1721
+ (problem_type and problem_type.lower() == 'classification'):
1722
+ sns.countplot(x=target_column,
1723
+ data=data,
1724
+ palette="coolwarm",
1725
+ hue=target_column,
1726
+ legend=False)
1727
+ else:
1728
+ # Numerical Target
1729
+ sns.histplot(data[target_column], kde=True, color="blue")
1730
+ plt.title("Target Distribution")
1731
+ plt.tight_layout()
1732
+ plt.show()
1733
+
1734
+
1735
+ @staticmethod
1736
+ def _count_plot(data,
1737
+ length=10,
1738
+ breadth=8,
1739
+ categorical_features=[]):
1740
+ """
1741
+ DESCRIPTION:
1742
+ Internal function to visualize the data using count plot.
1743
+
1744
+ PARAMETERS:
1745
+ data:
1746
+ Required Argument.
1747
+ Specifies the input pandas DataFrame for plotting.
1748
+ Types: pandas Dataframe
1749
+
1750
+ length:
1751
+ Optional Argument.
1752
+ Specifies the length of the plot.
1753
+ Default Value: 10
1754
+ Types: int
1755
+
1756
+ breadth:
1757
+ Optional Argument.
1758
+ Specifies the breadth of the plot.
1759
+ Default Value: 8
1760
+ Types: int
1761
+
1762
+ categorical_features:
1763
+ Optional Argument.
1764
+ Specifies the list of categorical features to be plotted.
1765
+ Types: list of str
1766
+
1767
+ RETURNS:
1768
+ str
1769
+
1770
+ RAISES:
1771
+ None
1772
+
1773
+ EXAMPLES:
1774
+ >>> _FeatureExplore._count_plot(data=data,
1775
+ length=10,
1776
+ breadth=8,
1777
+ categorical_features=["feature1", "feature2"])
1778
+ """
1779
+ if len(categorical_features) >= 1:
1780
+ rows = math.ceil(len(categorical_features) / 3)
1781
+ fig, axes = plt.subplots(rows, 3, figsize=(length, rows * 5))
1782
+ axes = axes.flatten()
1783
+ fig.suptitle("Count plot", fontsize=14)
1784
+
1785
+ for i, feature in enumerate(categorical_features):
1786
+ # Get top 20 most frequent categories
1787
+ top_categories = data[feature].value_counts().nlargest(25)
1788
+
1789
+ # Plot only top 20 categories
1790
+ sns.barplot(x=top_categories.index,
1791
+ y=top_categories.values,
1792
+ hue=top_categories.index,
1793
+ palette="coolwarm",
1794
+ legend=False,
1795
+ ax=axes[i])
1796
+
1797
+ # Rotate labels for readability
1798
+ axes[i].tick_params(axis='x', rotation=90)
1799
+
1800
+ # Hide empty subplots
1801
+ for i in range(len(categorical_features), len(axes)):
1802
+ axes[i].axis('off')
1803
+
1804
+ # Adjust layout spacing
1805
+ plt.subplots_adjust(hspace=1.5, wspace=0.3)
1806
+ plt.show()
1807
+ else:
1808
+ return f"Plot type 'count' is not applicable as no categorical features are available."
1809
+
1810
+ @staticmethod
1811
+ def _box_plot(data,
1812
+ length=10,
1813
+ breadth=8,
1814
+ numerical_features=[]):
1815
+ """
1816
+ DESCRIPTION:
1817
+ Internal function to visualize the data using box plot.
1818
+
1819
+ PARAMETERS:
1820
+ data:
1821
+ Required Argument.
1822
+ Specifies the input pandas DataFrame for plotting.
1823
+ Types: pandas Dataframe
1824
+
1825
+ length:
1826
+ Optional Argument.
1827
+ Specifies the length of the plot.
1828
+ Default Value: 10
1829
+ Types: int
1830
+
1831
+ breadth:
1832
+ Optional Argument.
1833
+ Specifies the breadth of the plot.
1834
+ Default Value: 8
1835
+ Types: int
1836
+
1837
+ numerical_features:
1838
+ Optional Argument.
1839
+ Specifies the list of numerical features to be plotted.
1840
+ Types: list of str
1841
+
1842
+ RETURNS:
1843
+ str
1844
+
1845
+ RAISES:
1846
+ None
1847
+
1848
+ EXAMPLES:
1849
+ >>> _FeatureExplore._box_plot(data=data,
1850
+ length=10,
1851
+ breadth=8,
1852
+ numerical_features=["feature1", "feature2"])
1853
+
1854
+ """
1855
+ if len(numerical_features) >= 1:
1856
+ rows = math.ceil(len(numerical_features) / 3)
1857
+ fig, axes = plt.subplots(rows, 3, figsize=(length, breadth))
1858
+ axes = axes.flatten()
1859
+ fig.suptitle("Box plot", fontsize=14)
1860
+
1861
+ for i, feature in enumerate(numerical_features):
1862
+ # Removed the hue argument and passed only the feature to x
1863
+ sns.boxplot(y=data[feature], data=data, ax=axes[i], legend=False)
1864
+ # Adjust layout to prevent label overlap
1865
+ plt.tight_layout()
1866
+
1867
+ # Hide any empty subplots
1868
+ for i in range(len(numerical_features), len(axes)):
1869
+ axes[i].axis('off')
1870
+
1871
+ plt.show()
1872
+ else:
1873
+ return f"Plot type 'box' is not applicable as no numerical features are available."