teradataml 20.0.0.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (1208) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +2762 -0
  4. teradataml/__init__.py +78 -0
  5. teradataml/_version.py +11 -0
  6. teradataml/analytics/Transformations.py +2996 -0
  7. teradataml/analytics/__init__.py +82 -0
  8. teradataml/analytics/analytic_function_executor.py +2416 -0
  9. teradataml/analytics/analytic_query_generator.py +1050 -0
  10. teradataml/analytics/byom/H2OPredict.py +514 -0
  11. teradataml/analytics/byom/PMMLPredict.py +437 -0
  12. teradataml/analytics/byom/__init__.py +16 -0
  13. teradataml/analytics/json_parser/__init__.py +133 -0
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1805 -0
  15. teradataml/analytics/json_parser/json_store.py +191 -0
  16. teradataml/analytics/json_parser/metadata.py +1666 -0
  17. teradataml/analytics/json_parser/utils.py +805 -0
  18. teradataml/analytics/meta_class.py +236 -0
  19. teradataml/analytics/sqle/DecisionTreePredict.py +456 -0
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +420 -0
  21. teradataml/analytics/sqle/__init__.py +128 -0
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -0
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -0
  24. teradataml/analytics/table_operator/__init__.py +11 -0
  25. teradataml/analytics/uaf/__init__.py +82 -0
  26. teradataml/analytics/utils.py +828 -0
  27. teradataml/analytics/valib.py +1617 -0
  28. teradataml/automl/__init__.py +5835 -0
  29. teradataml/automl/autodataprep/__init__.py +493 -0
  30. teradataml/automl/custom_json_utils.py +1625 -0
  31. teradataml/automl/data_preparation.py +1384 -0
  32. teradataml/automl/data_transformation.py +1254 -0
  33. teradataml/automl/feature_engineering.py +2273 -0
  34. teradataml/automl/feature_exploration.py +1873 -0
  35. teradataml/automl/model_evaluation.py +488 -0
  36. teradataml/automl/model_training.py +1407 -0
  37. teradataml/catalog/__init__.py +2 -0
  38. teradataml/catalog/byom.py +1759 -0
  39. teradataml/catalog/function_argument_mapper.py +859 -0
  40. teradataml/catalog/model_cataloging_utils.py +491 -0
  41. teradataml/clients/__init__.py +0 -0
  42. teradataml/clients/auth_client.py +137 -0
  43. teradataml/clients/keycloak_client.py +165 -0
  44. teradataml/clients/pkce_client.py +481 -0
  45. teradataml/common/__init__.py +1 -0
  46. teradataml/common/aed_utils.py +2078 -0
  47. teradataml/common/bulk_exposed_utils.py +113 -0
  48. teradataml/common/constants.py +1669 -0
  49. teradataml/common/deprecations.py +166 -0
  50. teradataml/common/exceptions.py +147 -0
  51. teradataml/common/formula.py +743 -0
  52. teradataml/common/garbagecollector.py +666 -0
  53. teradataml/common/logger.py +1261 -0
  54. teradataml/common/messagecodes.py +518 -0
  55. teradataml/common/messages.py +262 -0
  56. teradataml/common/pylogger.py +67 -0
  57. teradataml/common/sqlbundle.py +764 -0
  58. teradataml/common/td_coltype_code_to_tdtype.py +48 -0
  59. teradataml/common/utils.py +3166 -0
  60. teradataml/common/warnings.py +36 -0
  61. teradataml/common/wrapper_utils.py +625 -0
  62. teradataml/config/__init__.py +0 -0
  63. teradataml/config/dummy_file1.cfg +5 -0
  64. teradataml/config/dummy_file2.cfg +3 -0
  65. teradataml/config/sqlengine_alias_definitions_v1.0 +14 -0
  66. teradataml/config/sqlengine_alias_definitions_v1.1 +20 -0
  67. teradataml/config/sqlengine_alias_definitions_v1.3 +19 -0
  68. teradataml/context/__init__.py +0 -0
  69. teradataml/context/aed_context.py +223 -0
  70. teradataml/context/context.py +1462 -0
  71. teradataml/data/A_loan.csv +19 -0
  72. teradataml/data/BINARY_REALS_LEFT.csv +11 -0
  73. teradataml/data/BINARY_REALS_RIGHT.csv +11 -0
  74. teradataml/data/B_loan.csv +49 -0
  75. teradataml/data/BuoyData2.csv +17 -0
  76. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -0
  77. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -0
  78. teradataml/data/Convolve2RealsLeft.csv +5 -0
  79. teradataml/data/Convolve2RealsRight.csv +5 -0
  80. teradataml/data/Convolve2ValidLeft.csv +11 -0
  81. teradataml/data/Convolve2ValidRight.csv +11 -0
  82. teradataml/data/DFFTConv_Real_8_8.csv +65 -0
  83. teradataml/data/Employee.csv +5 -0
  84. teradataml/data/Employee_Address.csv +4 -0
  85. teradataml/data/Employee_roles.csv +5 -0
  86. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  87. teradataml/data/Mall_customer_data.csv +201 -0
  88. teradataml/data/Orders1_12mf.csv +25 -0
  89. teradataml/data/Pi_loan.csv +7 -0
  90. teradataml/data/SMOOTHED_DATA.csv +7 -0
  91. teradataml/data/TestDFFT8.csv +9 -0
  92. teradataml/data/TestRiver.csv +109 -0
  93. teradataml/data/Traindata.csv +28 -0
  94. teradataml/data/__init__.py +0 -0
  95. teradataml/data/acf.csv +17 -0
  96. teradataml/data/adaboost_example.json +34 -0
  97. teradataml/data/adaboostpredict_example.json +24 -0
  98. teradataml/data/additional_table.csv +11 -0
  99. teradataml/data/admissions_test.csv +21 -0
  100. teradataml/data/admissions_train.csv +41 -0
  101. teradataml/data/admissions_train_nulls.csv +41 -0
  102. teradataml/data/advertising.csv +201 -0
  103. teradataml/data/ageandheight.csv +13 -0
  104. teradataml/data/ageandpressure.csv +31 -0
  105. teradataml/data/amazon_reviews_25.csv +26 -0
  106. teradataml/data/antiselect_example.json +36 -0
  107. teradataml/data/antiselect_input.csv +8 -0
  108. teradataml/data/antiselect_input_mixed_case.csv +8 -0
  109. teradataml/data/applicant_external.csv +7 -0
  110. teradataml/data/applicant_reference.csv +7 -0
  111. teradataml/data/apriori_example.json +22 -0
  112. teradataml/data/arima_example.json +9 -0
  113. teradataml/data/assortedtext_input.csv +8 -0
  114. teradataml/data/attribution_example.json +34 -0
  115. teradataml/data/attribution_sample_table.csv +27 -0
  116. teradataml/data/attribution_sample_table1.csv +6 -0
  117. teradataml/data/attribution_sample_table2.csv +11 -0
  118. teradataml/data/bank_churn.csv +10001 -0
  119. teradataml/data/bank_marketing.csv +11163 -0
  120. teradataml/data/bank_web_clicks1.csv +43 -0
  121. teradataml/data/bank_web_clicks2.csv +91 -0
  122. teradataml/data/bank_web_url.csv +85 -0
  123. teradataml/data/barrier.csv +2 -0
  124. teradataml/data/barrier_new.csv +3 -0
  125. teradataml/data/betweenness_example.json +14 -0
  126. teradataml/data/bike_sharing.csv +732 -0
  127. teradataml/data/bin_breaks.csv +8 -0
  128. teradataml/data/bin_fit_ip.csv +4 -0
  129. teradataml/data/binary_complex_left.csv +11 -0
  130. teradataml/data/binary_complex_right.csv +11 -0
  131. teradataml/data/binary_matrix_complex_left.csv +21 -0
  132. teradataml/data/binary_matrix_complex_right.csv +21 -0
  133. teradataml/data/binary_matrix_real_left.csv +21 -0
  134. teradataml/data/binary_matrix_real_right.csv +21 -0
  135. teradataml/data/blood2ageandweight.csv +26 -0
  136. teradataml/data/bmi.csv +501 -0
  137. teradataml/data/boston.csv +507 -0
  138. teradataml/data/boston2cols.csv +721 -0
  139. teradataml/data/breast_cancer.csv +570 -0
  140. teradataml/data/buoydata_mix.csv +11 -0
  141. teradataml/data/burst_data.csv +5 -0
  142. teradataml/data/burst_example.json +21 -0
  143. teradataml/data/byom_example.json +34 -0
  144. teradataml/data/bytes_table.csv +4 -0
  145. teradataml/data/cal_housing_ex_raw.csv +70 -0
  146. teradataml/data/callers.csv +7 -0
  147. teradataml/data/calls.csv +10 -0
  148. teradataml/data/cars_hist.csv +33 -0
  149. teradataml/data/cat_table.csv +25 -0
  150. teradataml/data/ccm_example.json +32 -0
  151. teradataml/data/ccm_input.csv +91 -0
  152. teradataml/data/ccm_input2.csv +13 -0
  153. teradataml/data/ccmexample.csv +101 -0
  154. teradataml/data/ccmprepare_example.json +9 -0
  155. teradataml/data/ccmprepare_input.csv +91 -0
  156. teradataml/data/cfilter_example.json +12 -0
  157. teradataml/data/changepointdetection_example.json +18 -0
  158. teradataml/data/changepointdetectionrt_example.json +8 -0
  159. teradataml/data/chi_sq.csv +3 -0
  160. teradataml/data/churn_data.csv +14 -0
  161. teradataml/data/churn_emission.csv +35 -0
  162. teradataml/data/churn_initial.csv +3 -0
  163. teradataml/data/churn_state_transition.csv +5 -0
  164. teradataml/data/citedges_2.csv +745 -0
  165. teradataml/data/citvertices_2.csv +1210 -0
  166. teradataml/data/clicks2.csv +16 -0
  167. teradataml/data/clickstream.csv +13 -0
  168. teradataml/data/clickstream1.csv +11 -0
  169. teradataml/data/closeness_example.json +16 -0
  170. teradataml/data/complaints.csv +21 -0
  171. teradataml/data/complaints_mini.csv +3 -0
  172. teradataml/data/complaints_test_tokenized.csv +353 -0
  173. teradataml/data/complaints_testtoken.csv +224 -0
  174. teradataml/data/complaints_tokens_model.csv +348 -0
  175. teradataml/data/complaints_tokens_test.csv +353 -0
  176. teradataml/data/complaints_traintoken.csv +472 -0
  177. teradataml/data/computers_category.csv +1001 -0
  178. teradataml/data/computers_test1.csv +1252 -0
  179. teradataml/data/computers_train1.csv +5009 -0
  180. teradataml/data/computers_train1_clustered.csv +5009 -0
  181. teradataml/data/confusionmatrix_example.json +9 -0
  182. teradataml/data/conversion_event_table.csv +3 -0
  183. teradataml/data/corr_input.csv +17 -0
  184. teradataml/data/correlation_example.json +11 -0
  185. teradataml/data/covid_confirm_sd.csv +83 -0
  186. teradataml/data/coxhazardratio_example.json +39 -0
  187. teradataml/data/coxph_example.json +15 -0
  188. teradataml/data/coxsurvival_example.json +28 -0
  189. teradataml/data/cpt.csv +41 -0
  190. teradataml/data/credit_ex_merged.csv +45 -0
  191. teradataml/data/creditcard_data.csv +1001 -0
  192. teradataml/data/customer_loyalty.csv +301 -0
  193. teradataml/data/customer_loyalty_newseq.csv +31 -0
  194. teradataml/data/customer_segmentation_test.csv +2628 -0
  195. teradataml/data/customer_segmentation_train.csv +8069 -0
  196. teradataml/data/dataframe_example.json +173 -0
  197. teradataml/data/decisionforest_example.json +37 -0
  198. teradataml/data/decisionforestpredict_example.json +38 -0
  199. teradataml/data/decisiontree_example.json +21 -0
  200. teradataml/data/decisiontreepredict_example.json +45 -0
  201. teradataml/data/dfft2_size4_real.csv +17 -0
  202. teradataml/data/dfft2_test_matrix16.csv +17 -0
  203. teradataml/data/dfft2conv_real_4_4.csv +65 -0
  204. teradataml/data/diabetes.csv +443 -0
  205. teradataml/data/diabetes_test.csv +89 -0
  206. teradataml/data/dict_table.csv +5 -0
  207. teradataml/data/docperterm_table.csv +4 -0
  208. teradataml/data/docs/__init__.py +1 -0
  209. teradataml/data/docs/byom/__init__.py +0 -0
  210. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -0
  211. teradataml/data/docs/byom/docs/DataikuPredict.py +217 -0
  212. teradataml/data/docs/byom/docs/H2OPredict.py +325 -0
  213. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  214. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -0
  215. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  216. teradataml/data/docs/byom/docs/PMMLPredict.py +278 -0
  217. teradataml/data/docs/byom/docs/__init__.py +0 -0
  218. teradataml/data/docs/sqle/__init__.py +0 -0
  219. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +83 -0
  220. teradataml/data/docs/sqle/docs_17_10/Attribution.py +200 -0
  221. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +172 -0
  222. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -0
  223. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -0
  224. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -0
  225. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +86 -0
  226. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +96 -0
  227. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -0
  228. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +152 -0
  229. teradataml/data/docs/sqle/docs_17_10/FTest.py +161 -0
  230. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +83 -0
  231. teradataml/data/docs/sqle/docs_17_10/Fit.py +88 -0
  232. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -0
  233. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +85 -0
  234. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +82 -0
  235. teradataml/data/docs/sqle/docs_17_10/Histogram.py +165 -0
  236. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -0
  237. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +209 -0
  238. teradataml/data/docs/sqle/docs_17_10/NPath.py +266 -0
  239. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -0
  240. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -0
  241. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -0
  242. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +135 -0
  243. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -0
  244. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +166 -0
  245. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -0
  246. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -0
  247. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +112 -0
  248. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -0
  249. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +105 -0
  250. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +110 -0
  251. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +118 -0
  252. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -0
  253. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +153 -0
  254. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -0
  255. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -0
  256. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +114 -0
  257. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -0
  258. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -0
  259. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -0
  260. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +146 -0
  261. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -0
  262. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +142 -0
  263. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -0
  264. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -0
  265. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -0
  266. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -0
  267. teradataml/data/docs/sqle/docs_17_10/__init__.py +0 -0
  268. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -0
  269. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +83 -0
  270. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  271. teradataml/data/docs/sqle/docs_17_20/Attribution.py +201 -0
  272. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +172 -0
  273. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -0
  274. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  275. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -0
  276. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -0
  277. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -0
  278. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +86 -0
  279. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +246 -0
  280. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -0
  281. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +280 -0
  282. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -0
  283. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +136 -0
  284. teradataml/data/docs/sqle/docs_17_20/FTest.py +240 -0
  285. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +83 -0
  286. teradataml/data/docs/sqle/docs_17_20/Fit.py +88 -0
  287. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -0
  288. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +415 -0
  289. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -0
  290. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -0
  291. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -0
  292. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +109 -0
  293. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +106 -0
  294. teradataml/data/docs/sqle/docs_17_20/Histogram.py +224 -0
  295. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -0
  296. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -0
  297. teradataml/data/docs/sqle/docs_17_20/KNN.py +215 -0
  298. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -0
  299. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  300. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +209 -0
  301. teradataml/data/docs/sqle/docs_17_20/NPath.py +266 -0
  302. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  303. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -0
  304. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -0
  305. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +127 -0
  306. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +119 -0
  307. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -0
  308. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -0
  309. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -0
  310. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -0
  311. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +231 -0
  312. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -0
  313. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +220 -0
  314. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -0
  315. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +191 -0
  316. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -0
  317. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -0
  318. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  319. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +112 -0
  320. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -0
  321. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +105 -0
  322. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -0
  323. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +155 -0
  324. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -0
  325. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -0
  326. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -0
  327. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +109 -0
  328. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +118 -0
  329. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -0
  330. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  331. teradataml/data/docs/sqle/docs_17_20/SVM.py +414 -0
  332. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -0
  333. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +153 -0
  334. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -0
  335. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -0
  336. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -0
  337. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +114 -0
  338. teradataml/data/docs/sqle/docs_17_20/Shap.py +225 -0
  339. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +153 -0
  340. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -0
  341. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -0
  342. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -0
  343. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +146 -0
  344. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -0
  345. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -0
  346. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  347. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  348. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +267 -0
  349. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -0
  350. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  351. teradataml/data/docs/sqle/docs_17_20/TextParser.py +224 -0
  352. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +160 -0
  353. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -0
  354. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +142 -0
  355. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -0
  356. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  357. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +169 -0
  358. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -0
  359. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -0
  360. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +237 -0
  361. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +362 -0
  362. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -0
  363. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -0
  364. teradataml/data/docs/sqle/docs_17_20/__init__.py +0 -0
  365. teradataml/data/docs/tableoperator/__init__.py +0 -0
  366. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +430 -0
  367. teradataml/data/docs/tableoperator/docs_17_00/__init__.py +0 -0
  368. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +430 -0
  369. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +348 -0
  370. teradataml/data/docs/tableoperator/docs_17_05/__init__.py +0 -0
  371. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +429 -0
  372. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +348 -0
  373. teradataml/data/docs/tableoperator/docs_17_10/__init__.py +0 -0
  374. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  375. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +440 -0
  376. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +387 -0
  377. teradataml/data/docs/tableoperator/docs_17_20/__init__.py +0 -0
  378. teradataml/data/docs/uaf/__init__.py +0 -0
  379. teradataml/data/docs/uaf/docs_17_20/ACF.py +186 -0
  380. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +370 -0
  381. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +172 -0
  382. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +161 -0
  383. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  384. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  385. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +248 -0
  386. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -0
  387. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +178 -0
  388. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +175 -0
  389. teradataml/data/docs/uaf/docs_17_20/Convolve.py +230 -0
  390. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +218 -0
  391. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  392. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +185 -0
  393. teradataml/data/docs/uaf/docs_17_20/DFFT.py +204 -0
  394. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -0
  395. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +216 -0
  396. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +192 -0
  397. teradataml/data/docs/uaf/docs_17_20/DIFF.py +175 -0
  398. teradataml/data/docs/uaf/docs_17_20/DTW.py +180 -0
  399. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  400. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +217 -0
  401. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +142 -0
  402. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +184 -0
  403. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +185 -0
  404. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  405. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -0
  406. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +206 -0
  407. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +143 -0
  408. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +198 -0
  409. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +260 -0
  410. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +165 -0
  411. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +191 -0
  412. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  413. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  414. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  415. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +121 -0
  416. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +156 -0
  417. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +215 -0
  418. teradataml/data/docs/uaf/docs_17_20/MAMean.py +174 -0
  419. teradataml/data/docs/uaf/docs_17_20/MInfo.py +134 -0
  420. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  421. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +145 -0
  422. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +191 -0
  423. teradataml/data/docs/uaf/docs_17_20/PACF.py +157 -0
  424. teradataml/data/docs/uaf/docs_17_20/Portman.py +217 -0
  425. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +203 -0
  426. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +155 -0
  427. teradataml/data/docs/uaf/docs_17_20/Resample.py +237 -0
  428. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  429. teradataml/data/docs/uaf/docs_17_20/SInfo.py +123 -0
  430. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +173 -0
  431. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +174 -0
  432. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +171 -0
  433. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +164 -0
  434. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +180 -0
  435. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +208 -0
  436. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +151 -0
  437. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -0
  438. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +202 -0
  439. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +171 -0
  440. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  441. teradataml/data/docs/uaf/docs_17_20/__init__.py +0 -0
  442. teradataml/data/dtw_example.json +18 -0
  443. teradataml/data/dtw_t1.csv +11 -0
  444. teradataml/data/dtw_t2.csv +4 -0
  445. teradataml/data/dwt2d_dataTable.csv +65 -0
  446. teradataml/data/dwt2d_example.json +16 -0
  447. teradataml/data/dwt_dataTable.csv +8 -0
  448. teradataml/data/dwt_example.json +15 -0
  449. teradataml/data/dwt_filterTable.csv +3 -0
  450. teradataml/data/dwt_filter_dim.csv +5 -0
  451. teradataml/data/emission.csv +9 -0
  452. teradataml/data/emp_table_by_dept.csv +19 -0
  453. teradataml/data/employee_info.csv +4 -0
  454. teradataml/data/employee_table.csv +6 -0
  455. teradataml/data/excluding_event_table.csv +2 -0
  456. teradataml/data/finance_data.csv +6 -0
  457. teradataml/data/finance_data2.csv +61 -0
  458. teradataml/data/finance_data3.csv +93 -0
  459. teradataml/data/finance_data4.csv +13 -0
  460. teradataml/data/fish.csv +160 -0
  461. teradataml/data/fm_blood2ageandweight.csv +26 -0
  462. teradataml/data/fmeasure_example.json +12 -0
  463. teradataml/data/followers_leaders.csv +10 -0
  464. teradataml/data/fpgrowth_example.json +12 -0
  465. teradataml/data/frequentpaths_example.json +29 -0
  466. teradataml/data/friends.csv +9 -0
  467. teradataml/data/fs_input.csv +33 -0
  468. teradataml/data/fs_input1.csv +33 -0
  469. teradataml/data/genData.csv +513 -0
  470. teradataml/data/geodataframe_example.json +40 -0
  471. teradataml/data/glass_types.csv +215 -0
  472. teradataml/data/glm_admissions_model.csv +12 -0
  473. teradataml/data/glm_example.json +56 -0
  474. teradataml/data/glml1l2_example.json +28 -0
  475. teradataml/data/glml1l2predict_example.json +54 -0
  476. teradataml/data/glmpredict_example.json +54 -0
  477. teradataml/data/gq_t1.csv +21 -0
  478. teradataml/data/grocery_transaction.csv +19 -0
  479. teradataml/data/hconvolve_complex_right.csv +5 -0
  480. teradataml/data/hconvolve_complex_rightmulti.csv +5 -0
  481. teradataml/data/histogram_example.json +12 -0
  482. teradataml/data/hmmdecoder_example.json +79 -0
  483. teradataml/data/hmmevaluator_example.json +25 -0
  484. teradataml/data/hmmsupervised_example.json +10 -0
  485. teradataml/data/hmmunsupervised_example.json +8 -0
  486. teradataml/data/hnsw_alter_data.csv +5 -0
  487. teradataml/data/hnsw_data.csv +10 -0
  488. teradataml/data/house_values.csv +12 -0
  489. teradataml/data/house_values2.csv +13 -0
  490. teradataml/data/housing_cat.csv +7 -0
  491. teradataml/data/housing_data.csv +9 -0
  492. teradataml/data/housing_test.csv +47 -0
  493. teradataml/data/housing_test_binary.csv +47 -0
  494. teradataml/data/housing_train.csv +493 -0
  495. teradataml/data/housing_train_attribute.csv +5 -0
  496. teradataml/data/housing_train_binary.csv +437 -0
  497. teradataml/data/housing_train_parameter.csv +2 -0
  498. teradataml/data/housing_train_response.csv +493 -0
  499. teradataml/data/housing_train_segment.csv +201 -0
  500. teradataml/data/ibm_stock.csv +370 -0
  501. teradataml/data/ibm_stock1.csv +370 -0
  502. teradataml/data/identitymatch_example.json +22 -0
  503. teradataml/data/idf_table.csv +4 -0
  504. teradataml/data/idwt2d_dataTable.csv +5 -0
  505. teradataml/data/idwt_dataTable.csv +8 -0
  506. teradataml/data/idwt_filterTable.csv +3 -0
  507. teradataml/data/impressions.csv +101 -0
  508. teradataml/data/inflation.csv +21 -0
  509. teradataml/data/initial.csv +3 -0
  510. teradataml/data/insect2Cols.csv +61 -0
  511. teradataml/data/insect_sprays.csv +13 -0
  512. teradataml/data/insurance.csv +1339 -0
  513. teradataml/data/interpolator_example.json +13 -0
  514. teradataml/data/interval_data.csv +5 -0
  515. teradataml/data/iris_altinput.csv +481 -0
  516. teradataml/data/iris_attribute_output.csv +8 -0
  517. teradataml/data/iris_attribute_test.csv +121 -0
  518. teradataml/data/iris_attribute_train.csv +481 -0
  519. teradataml/data/iris_category_expect_predict.csv +31 -0
  520. teradataml/data/iris_data.csv +151 -0
  521. teradataml/data/iris_input.csv +151 -0
  522. teradataml/data/iris_response_train.csv +121 -0
  523. teradataml/data/iris_test.csv +31 -0
  524. teradataml/data/iris_train.csv +121 -0
  525. teradataml/data/join_table1.csv +4 -0
  526. teradataml/data/join_table2.csv +4 -0
  527. teradataml/data/jsons/anly_function_name.json +7 -0
  528. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  529. teradataml/data/jsons/byom/dataikupredict.json +148 -0
  530. teradataml/data/jsons/byom/datarobotpredict.json +147 -0
  531. teradataml/data/jsons/byom/h2opredict.json +195 -0
  532. teradataml/data/jsons/byom/onnxembeddings.json +267 -0
  533. teradataml/data/jsons/byom/onnxpredict.json +187 -0
  534. teradataml/data/jsons/byom/pmmlpredict.json +147 -0
  535. teradataml/data/jsons/paired_functions.json +450 -0
  536. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -0
  537. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -0
  538. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -0
  539. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -0
  540. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -0
  541. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -0
  542. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -0
  543. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -0
  544. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -0
  545. teradataml/data/jsons/sqle/16.20/Pack.json +98 -0
  546. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -0
  547. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -0
  548. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -0
  549. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -0
  550. teradataml/data/jsons/sqle/16.20/nPath.json +269 -0
  551. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -0
  552. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -0
  553. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -0
  554. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -0
  555. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -0
  556. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -0
  557. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -0
  558. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -0
  559. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -0
  560. teradataml/data/jsons/sqle/17.00/Pack.json +98 -0
  561. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -0
  562. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -0
  563. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -0
  564. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -0
  565. teradataml/data/jsons/sqle/17.00/nPath.json +269 -0
  566. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -0
  567. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -0
  568. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -0
  569. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -0
  570. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -0
  571. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -0
  572. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -0
  573. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -0
  574. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -0
  575. teradataml/data/jsons/sqle/17.05/Pack.json +98 -0
  576. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -0
  577. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -0
  578. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -0
  579. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -0
  580. teradataml/data/jsons/sqle/17.05/nPath.json +269 -0
  581. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -0
  582. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -0
  583. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -0
  584. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +172 -0
  585. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -0
  586. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -0
  587. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -0
  588. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -0
  589. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -0
  590. teradataml/data/jsons/sqle/17.10/Pack.json +133 -0
  591. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -0
  592. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -0
  593. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -0
  594. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -0
  595. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -0
  596. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +54 -0
  597. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +68 -0
  598. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +54 -0
  599. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +69 -0
  600. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -0
  601. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +52 -0
  602. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -0
  603. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -0
  604. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +53 -0
  605. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +53 -0
  606. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +133 -0
  607. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -0
  608. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +183 -0
  609. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +66 -0
  610. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +197 -0
  611. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -0
  612. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -0
  613. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -0
  614. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +112 -0
  615. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -0
  616. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +128 -0
  617. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +71 -0
  618. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +157 -0
  619. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +71 -0
  620. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +148 -0
  621. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -0
  622. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -0
  623. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +119 -0
  624. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +53 -0
  625. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +53 -0
  626. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -0
  627. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -0
  628. teradataml/data/jsons/sqle/17.10/nPath.json +269 -0
  629. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -0
  630. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -0
  631. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -0
  632. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -0
  633. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -0
  634. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -0
  635. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -0
  636. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -0
  637. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -0
  638. teradataml/data/jsons/sqle/17.20/Pack.json +133 -0
  639. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -0
  640. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -0
  641. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -0
  642. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +149 -0
  643. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  644. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -0
  645. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -0
  646. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  647. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -0
  648. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +68 -0
  649. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +146 -0
  650. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -0
  651. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -0
  652. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -0
  653. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +260 -0
  654. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -0
  655. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -0
  656. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -0
  657. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -0
  658. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -0
  659. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -0
  660. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -0
  661. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -0
  662. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -0
  663. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -0
  664. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -0
  665. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -0
  666. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -0
  667. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +232 -0
  668. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +87 -0
  669. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -0
  670. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  671. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  672. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  673. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -0
  674. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -0
  675. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -0
  676. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -0
  677. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +316 -0
  678. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +124 -0
  679. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -0
  680. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -0
  681. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -0
  682. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -0
  683. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -0
  684. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -0
  685. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  686. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -0
  687. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -0
  688. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -0
  689. teradataml/data/jsons/sqle/17.20/TD_ROC.json +179 -0
  690. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +179 -0
  691. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +74 -0
  692. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -0
  693. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +138 -0
  694. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -0
  695. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +128 -0
  696. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +71 -0
  697. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  698. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -0
  699. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -0
  700. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +310 -0
  701. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +120 -0
  702. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +194 -0
  703. teradataml/data/jsons/sqle/17.20/TD_Shap.json +221 -0
  704. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +143 -0
  705. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -0
  706. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -0
  707. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -0
  708. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  709. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -0
  710. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -0
  711. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  712. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +297 -0
  713. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -0
  714. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -0
  715. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  716. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +183 -0
  717. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +53 -0
  718. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +53 -0
  719. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -0
  720. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -0
  721. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -0
  722. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -0
  723. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -0
  724. teradataml/data/jsons/sqle/17.20/nPath.json +269 -0
  725. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +370 -0
  726. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +460 -0
  727. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +385 -0
  728. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +369 -0
  729. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +369 -0
  730. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +369 -0
  731. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +369 -0
  732. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +400 -0
  733. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +401 -0
  734. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +384 -0
  735. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +384 -0
  736. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  737. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  738. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  739. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  740. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  741. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  742. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  743. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  744. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  745. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  746. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  747. teradataml/data/jsons/tableoperator/17.00/read_nos.json +198 -0
  748. teradataml/data/jsons/tableoperator/17.05/read_nos.json +198 -0
  749. teradataml/data/jsons/tableoperator/17.05/write_nos.json +195 -0
  750. teradataml/data/jsons/tableoperator/17.10/read_nos.json +184 -0
  751. teradataml/data/jsons/tableoperator/17.10/write_nos.json +195 -0
  752. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  753. teradataml/data/jsons/tableoperator/17.20/read_nos.json +183 -0
  754. teradataml/data/jsons/tableoperator/17.20/write_nos.json +224 -0
  755. teradataml/data/jsons/uaf/17.20/TD_ACF.json +132 -0
  756. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +396 -0
  757. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +77 -0
  758. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +153 -0
  759. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  760. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  761. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +107 -0
  762. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +106 -0
  763. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +89 -0
  764. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +104 -0
  765. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +78 -0
  766. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +66 -0
  767. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +87 -0
  768. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +134 -0
  769. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +144 -0
  770. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -0
  771. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +108 -0
  772. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +78 -0
  773. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +92 -0
  774. teradataml/data/jsons/uaf/17.20/TD_DTW.json +114 -0
  775. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +101 -0
  776. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  777. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  778. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +39 -0
  779. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +101 -0
  780. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +85 -0
  781. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +71 -0
  782. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +139 -0
  783. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECASTER.json +313 -0
  784. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +58 -0
  785. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +81 -0
  786. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  787. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  788. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +64 -0
  789. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  790. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +182 -0
  791. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +103 -0
  792. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +181 -0
  793. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  794. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +68 -0
  795. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +67 -0
  796. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +179 -0
  797. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -0
  798. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +119 -0
  799. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -0
  800. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +98 -0
  801. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +194 -0
  802. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  803. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +143 -0
  804. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +90 -0
  805. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +80 -0
  806. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +68 -0
  807. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -0
  808. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +58 -0
  809. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +163 -0
  810. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +101 -0
  811. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +112 -0
  812. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -0
  813. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +78 -0
  814. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  815. teradataml/data/kmeans_example.json +23 -0
  816. teradataml/data/kmeans_table.csv +10 -0
  817. teradataml/data/kmeans_us_arrests_data.csv +51 -0
  818. teradataml/data/knn_example.json +19 -0
  819. teradataml/data/knnrecommender_example.json +7 -0
  820. teradataml/data/knnrecommenderpredict_example.json +12 -0
  821. teradataml/data/lar_example.json +17 -0
  822. teradataml/data/larpredict_example.json +30 -0
  823. teradataml/data/lc_new_predictors.csv +5 -0
  824. teradataml/data/lc_new_reference.csv +9 -0
  825. teradataml/data/lda_example.json +9 -0
  826. teradataml/data/ldainference_example.json +15 -0
  827. teradataml/data/ldatopicsummary_example.json +9 -0
  828. teradataml/data/levendist_input.csv +13 -0
  829. teradataml/data/levenshteindistance_example.json +10 -0
  830. teradataml/data/linreg_example.json +10 -0
  831. teradataml/data/load_example_data.py +350 -0
  832. teradataml/data/loan_prediction.csv +295 -0
  833. teradataml/data/lungcancer.csv +138 -0
  834. teradataml/data/mappingdata.csv +12 -0
  835. teradataml/data/medical_readings.csv +101 -0
  836. teradataml/data/milk_timeseries.csv +157 -0
  837. teradataml/data/min_max_titanic.csv +4 -0
  838. teradataml/data/minhash_example.json +6 -0
  839. teradataml/data/ml_ratings.csv +7547 -0
  840. teradataml/data/ml_ratings_10.csv +2445 -0
  841. teradataml/data/mobile_data.csv +13 -0
  842. teradataml/data/model1_table.csv +5 -0
  843. teradataml/data/model2_table.csv +5 -0
  844. teradataml/data/models/License_file.txt +1 -0
  845. teradataml/data/models/License_file_empty.txt +0 -0
  846. teradataml/data/models/dataiku_iris_data_ann_thin +0 -0
  847. teradataml/data/models/dr_iris_rf +0 -0
  848. teradataml/data/models/iris_db_dt_model_sklearn.onnx +0 -0
  849. teradataml/data/models/iris_db_dt_model_sklearn_floattensor.onnx +0 -0
  850. teradataml/data/models/iris_db_glm_model.pmml +57 -0
  851. teradataml/data/models/iris_db_xgb_model.pmml +4471 -0
  852. teradataml/data/models/iris_kmeans_model +0 -0
  853. teradataml/data/models/iris_mojo_glm_h2o_model +0 -0
  854. teradataml/data/models/iris_mojo_xgb_h2o_model +0 -0
  855. teradataml/data/modularity_example.json +12 -0
  856. teradataml/data/movavg_example.json +8 -0
  857. teradataml/data/mtx1.csv +7 -0
  858. teradataml/data/mtx2.csv +13 -0
  859. teradataml/data/multi_model_classification.csv +401 -0
  860. teradataml/data/multi_model_regression.csv +401 -0
  861. teradataml/data/mvdfft8.csv +9 -0
  862. teradataml/data/naivebayes_example.json +10 -0
  863. teradataml/data/naivebayespredict_example.json +19 -0
  864. teradataml/data/naivebayestextclassifier2_example.json +7 -0
  865. teradataml/data/naivebayestextclassifier_example.json +8 -0
  866. teradataml/data/naivebayestextclassifierpredict_example.json +32 -0
  867. teradataml/data/name_Find_configure.csv +10 -0
  868. teradataml/data/namedentityfinder_example.json +14 -0
  869. teradataml/data/namedentityfinderevaluator_example.json +10 -0
  870. teradataml/data/namedentityfindertrainer_example.json +6 -0
  871. teradataml/data/nb_iris_input_test.csv +31 -0
  872. teradataml/data/nb_iris_input_train.csv +121 -0
  873. teradataml/data/nbp_iris_model.csv +13 -0
  874. teradataml/data/ner_dict.csv +8 -0
  875. teradataml/data/ner_extractor_text.csv +2 -0
  876. teradataml/data/ner_input_eng.csv +7 -0
  877. teradataml/data/ner_rule.csv +5 -0
  878. teradataml/data/ner_sports_test2.csv +29 -0
  879. teradataml/data/ner_sports_train.csv +501 -0
  880. teradataml/data/nerevaluator_example.json +6 -0
  881. teradataml/data/nerextractor_example.json +18 -0
  882. teradataml/data/nermem_sports_test.csv +18 -0
  883. teradataml/data/nermem_sports_train.csv +51 -0
  884. teradataml/data/nertrainer_example.json +7 -0
  885. teradataml/data/ngrams_example.json +7 -0
  886. teradataml/data/notebooks/__init__.py +0 -0
  887. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -0
  888. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -0
  889. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -0
  890. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -0
  891. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -0
  892. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -0
  893. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -0
  894. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -0
  895. teradataml/data/notebooks/sqlalchemy/__init__.py +0 -0
  896. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -0
  897. teradataml/data/npath_example.json +23 -0
  898. teradataml/data/ntree_example.json +14 -0
  899. teradataml/data/numeric_strings.csv +5 -0
  900. teradataml/data/numerics.csv +4 -0
  901. teradataml/data/ocean_buoy.csv +17 -0
  902. teradataml/data/ocean_buoy2.csv +17 -0
  903. teradataml/data/ocean_buoys.csv +28 -0
  904. teradataml/data/ocean_buoys2.csv +10 -0
  905. teradataml/data/ocean_buoys_nonpti.csv +28 -0
  906. teradataml/data/ocean_buoys_seq.csv +29 -0
  907. teradataml/data/onehot_encoder_train.csv +4 -0
  908. teradataml/data/openml_example.json +92 -0
  909. teradataml/data/optional_event_table.csv +4 -0
  910. teradataml/data/orders1.csv +11 -0
  911. teradataml/data/orders1_12.csv +13 -0
  912. teradataml/data/orders_ex.csv +4 -0
  913. teradataml/data/pack_example.json +9 -0
  914. teradataml/data/package_tracking.csv +19 -0
  915. teradataml/data/package_tracking_pti.csv +19 -0
  916. teradataml/data/pagerank_example.json +13 -0
  917. teradataml/data/paragraphs_input.csv +6 -0
  918. teradataml/data/pathanalyzer_example.json +8 -0
  919. teradataml/data/pathgenerator_example.json +8 -0
  920. teradataml/data/patient_profile.csv +101 -0
  921. teradataml/data/pattern_matching_data.csv +11 -0
  922. teradataml/data/payment_fraud_dataset.csv +10001 -0
  923. teradataml/data/peppers.png +0 -0
  924. teradataml/data/phrases.csv +7 -0
  925. teradataml/data/pivot_example.json +9 -0
  926. teradataml/data/pivot_input.csv +22 -0
  927. teradataml/data/playerRating.csv +31 -0
  928. teradataml/data/pos_input.csv +40 -0
  929. teradataml/data/postagger_example.json +7 -0
  930. teradataml/data/posttagger_output.csv +44 -0
  931. teradataml/data/production_data.csv +17 -0
  932. teradataml/data/production_data2.csv +7 -0
  933. teradataml/data/randomsample_example.json +32 -0
  934. teradataml/data/randomwalksample_example.json +9 -0
  935. teradataml/data/rank_table.csv +6 -0
  936. teradataml/data/real_values.csv +14 -0
  937. teradataml/data/ref_mobile_data.csv +4 -0
  938. teradataml/data/ref_mobile_data_dense.csv +2 -0
  939. teradataml/data/ref_url.csv +17 -0
  940. teradataml/data/restaurant_reviews.csv +7 -0
  941. teradataml/data/retail_churn_table.csv +27772 -0
  942. teradataml/data/river_data.csv +145 -0
  943. teradataml/data/roc_example.json +8 -0
  944. teradataml/data/roc_input.csv +101 -0
  945. teradataml/data/rule_inputs.csv +6 -0
  946. teradataml/data/rule_table.csv +2 -0
  947. teradataml/data/sales.csv +7 -0
  948. teradataml/data/sales_transaction.csv +501 -0
  949. teradataml/data/salesdata.csv +342 -0
  950. teradataml/data/sample_cities.csv +3 -0
  951. teradataml/data/sample_shapes.csv +11 -0
  952. teradataml/data/sample_streets.csv +3 -0
  953. teradataml/data/sampling_example.json +16 -0
  954. teradataml/data/sax_example.json +17 -0
  955. teradataml/data/scale_attributes.csv +3 -0
  956. teradataml/data/scale_example.json +74 -0
  957. teradataml/data/scale_housing.csv +11 -0
  958. teradataml/data/scale_housing_test.csv +6 -0
  959. teradataml/data/scale_input_part_sparse.csv +31 -0
  960. teradataml/data/scale_input_partitioned.csv +16 -0
  961. teradataml/data/scale_input_sparse.csv +11 -0
  962. teradataml/data/scale_parameters.csv +3 -0
  963. teradataml/data/scale_stat.csv +11 -0
  964. teradataml/data/scalebypartition_example.json +13 -0
  965. teradataml/data/scalemap_example.json +13 -0
  966. teradataml/data/scalesummary_example.json +12 -0
  967. teradataml/data/score_category.csv +101 -0
  968. teradataml/data/score_summary.csv +4 -0
  969. teradataml/data/script_example.json +10 -0
  970. teradataml/data/scripts/deploy_script.py +84 -0
  971. teradataml/data/scripts/lightgbm/dataset.template +175 -0
  972. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +264 -0
  973. teradataml/data/scripts/lightgbm/lightgbm_function.template +234 -0
  974. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +177 -0
  975. teradataml/data/scripts/mapper.R +20 -0
  976. teradataml/data/scripts/mapper.py +16 -0
  977. teradataml/data/scripts/mapper_replace.py +16 -0
  978. teradataml/data/scripts/sklearn/__init__.py +0 -0
  979. teradataml/data/scripts/sklearn/sklearn_fit.py +205 -0
  980. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +148 -0
  981. teradataml/data/scripts/sklearn/sklearn_function.template +144 -0
  982. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +166 -0
  983. teradataml/data/scripts/sklearn/sklearn_neighbors.py +161 -0
  984. teradataml/data/scripts/sklearn/sklearn_score.py +145 -0
  985. teradataml/data/scripts/sklearn/sklearn_transform.py +327 -0
  986. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  987. teradataml/data/seeds.csv +10 -0
  988. teradataml/data/sentenceextractor_example.json +7 -0
  989. teradataml/data/sentiment_extract_input.csv +11 -0
  990. teradataml/data/sentiment_train.csv +16 -0
  991. teradataml/data/sentiment_word.csv +20 -0
  992. teradataml/data/sentiment_word_input.csv +20 -0
  993. teradataml/data/sentimentextractor_example.json +24 -0
  994. teradataml/data/sentimenttrainer_example.json +8 -0
  995. teradataml/data/sequence_table.csv +10 -0
  996. teradataml/data/seriessplitter_example.json +8 -0
  997. teradataml/data/sessionize_example.json +17 -0
  998. teradataml/data/sessionize_table.csv +116 -0
  999. teradataml/data/setop_test1.csv +24 -0
  1000. teradataml/data/setop_test2.csv +22 -0
  1001. teradataml/data/soc_nw_edges.csv +11 -0
  1002. teradataml/data/soc_nw_vertices.csv +8 -0
  1003. teradataml/data/souvenir_timeseries.csv +168 -0
  1004. teradataml/data/sparse_iris_attribute.csv +5 -0
  1005. teradataml/data/sparse_iris_test.csv +121 -0
  1006. teradataml/data/sparse_iris_train.csv +601 -0
  1007. teradataml/data/star1.csv +6 -0
  1008. teradataml/data/star_pivot.csv +8 -0
  1009. teradataml/data/state_transition.csv +5 -0
  1010. teradataml/data/stock_data.csv +53 -0
  1011. teradataml/data/stock_movement.csv +11 -0
  1012. teradataml/data/stock_vol.csv +76 -0
  1013. teradataml/data/stop_words.csv +8 -0
  1014. teradataml/data/store_sales.csv +37 -0
  1015. teradataml/data/stringsimilarity_example.json +8 -0
  1016. teradataml/data/strsimilarity_input.csv +13 -0
  1017. teradataml/data/students.csv +101 -0
  1018. teradataml/data/svm_iris_input_test.csv +121 -0
  1019. teradataml/data/svm_iris_input_train.csv +481 -0
  1020. teradataml/data/svm_iris_model.csv +7 -0
  1021. teradataml/data/svmdense_example.json +10 -0
  1022. teradataml/data/svmdensepredict_example.json +19 -0
  1023. teradataml/data/svmsparse_example.json +8 -0
  1024. teradataml/data/svmsparsepredict_example.json +14 -0
  1025. teradataml/data/svmsparsesummary_example.json +8 -0
  1026. teradataml/data/target_mobile_data.csv +13 -0
  1027. teradataml/data/target_mobile_data_dense.csv +5 -0
  1028. teradataml/data/target_udt_data.csv +8 -0
  1029. teradataml/data/tdnerextractor_example.json +14 -0
  1030. teradataml/data/templatedata.csv +1201 -0
  1031. teradataml/data/templates/open_source_ml.json +11 -0
  1032. teradataml/data/teradata_icon.ico +0 -0
  1033. teradataml/data/teradataml_example.json +1473 -0
  1034. teradataml/data/test_classification.csv +101 -0
  1035. teradataml/data/test_loan_prediction.csv +53 -0
  1036. teradataml/data/test_pacf_12.csv +37 -0
  1037. teradataml/data/test_prediction.csv +101 -0
  1038. teradataml/data/test_regression.csv +101 -0
  1039. teradataml/data/test_river2.csv +109 -0
  1040. teradataml/data/text_inputs.csv +6 -0
  1041. teradataml/data/textchunker_example.json +8 -0
  1042. teradataml/data/textclassifier_example.json +7 -0
  1043. teradataml/data/textclassifier_input.csv +7 -0
  1044. teradataml/data/textclassifiertrainer_example.json +7 -0
  1045. teradataml/data/textmorph_example.json +11 -0
  1046. teradataml/data/textparser_example.json +15 -0
  1047. teradataml/data/texttagger_example.json +12 -0
  1048. teradataml/data/texttokenizer_example.json +7 -0
  1049. teradataml/data/texttrainer_input.csv +11 -0
  1050. teradataml/data/tf_example.json +7 -0
  1051. teradataml/data/tfidf_example.json +14 -0
  1052. teradataml/data/tfidf_input1.csv +201 -0
  1053. teradataml/data/tfidf_train.csv +6 -0
  1054. teradataml/data/time_table1.csv +535 -0
  1055. teradataml/data/time_table2.csv +14 -0
  1056. teradataml/data/timeseriesdata.csv +1601 -0
  1057. teradataml/data/timeseriesdatasetsd4.csv +105 -0
  1058. teradataml/data/timestamp_data.csv +4 -0
  1059. teradataml/data/titanic.csv +892 -0
  1060. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  1061. teradataml/data/to_num_data.csv +4 -0
  1062. teradataml/data/tochar_data.csv +5 -0
  1063. teradataml/data/token_table.csv +696 -0
  1064. teradataml/data/train_multiclass.csv +101 -0
  1065. teradataml/data/train_regression.csv +101 -0
  1066. teradataml/data/train_regression_multiple_labels.csv +101 -0
  1067. teradataml/data/train_tracking.csv +28 -0
  1068. teradataml/data/trans_dense.csv +16 -0
  1069. teradataml/data/trans_sparse.csv +55 -0
  1070. teradataml/data/transformation_table.csv +6 -0
  1071. teradataml/data/transformation_table_new.csv +2 -0
  1072. teradataml/data/tv_spots.csv +16 -0
  1073. teradataml/data/twod_climate_data.csv +117 -0
  1074. teradataml/data/uaf_example.json +529 -0
  1075. teradataml/data/univariatestatistics_example.json +9 -0
  1076. teradataml/data/unpack_example.json +10 -0
  1077. teradataml/data/unpivot_example.json +25 -0
  1078. teradataml/data/unpivot_input.csv +8 -0
  1079. teradataml/data/url_data.csv +10 -0
  1080. teradataml/data/us_air_pass.csv +37 -0
  1081. teradataml/data/us_population.csv +624 -0
  1082. teradataml/data/us_states_shapes.csv +52 -0
  1083. teradataml/data/varmax_example.json +18 -0
  1084. teradataml/data/vectordistance_example.json +30 -0
  1085. teradataml/data/ville_climatedata.csv +121 -0
  1086. teradataml/data/ville_tempdata.csv +12 -0
  1087. teradataml/data/ville_tempdata1.csv +12 -0
  1088. teradataml/data/ville_temperature.csv +11 -0
  1089. teradataml/data/waveletTable.csv +1605 -0
  1090. teradataml/data/waveletTable2.csv +1605 -0
  1091. teradataml/data/weightedmovavg_example.json +9 -0
  1092. teradataml/data/wft_testing.csv +5 -0
  1093. teradataml/data/windowdfft.csv +16 -0
  1094. teradataml/data/wine_data.csv +1600 -0
  1095. teradataml/data/word_embed_input_table1.csv +6 -0
  1096. teradataml/data/word_embed_input_table2.csv +5 -0
  1097. teradataml/data/word_embed_model.csv +23 -0
  1098. teradataml/data/words_input.csv +13 -0
  1099. teradataml/data/xconvolve_complex_left.csv +6 -0
  1100. teradataml/data/xconvolve_complex_leftmulti.csv +6 -0
  1101. teradataml/data/xgboost_example.json +36 -0
  1102. teradataml/data/xgboostpredict_example.json +32 -0
  1103. teradataml/data/ztest_example.json +16 -0
  1104. teradataml/dataframe/__init__.py +0 -0
  1105. teradataml/dataframe/copy_to.py +2446 -0
  1106. teradataml/dataframe/data_transfer.py +2840 -0
  1107. teradataml/dataframe/dataframe.py +20908 -0
  1108. teradataml/dataframe/dataframe_utils.py +2114 -0
  1109. teradataml/dataframe/fastload.py +794 -0
  1110. teradataml/dataframe/functions.py +2110 -0
  1111. teradataml/dataframe/indexer.py +424 -0
  1112. teradataml/dataframe/row.py +160 -0
  1113. teradataml/dataframe/setop.py +1171 -0
  1114. teradataml/dataframe/sql.py +10904 -0
  1115. teradataml/dataframe/sql_function_parameters.py +440 -0
  1116. teradataml/dataframe/sql_functions.py +652 -0
  1117. teradataml/dataframe/sql_interfaces.py +220 -0
  1118. teradataml/dataframe/vantage_function_types.py +675 -0
  1119. teradataml/dataframe/window.py +694 -0
  1120. teradataml/dbutils/__init__.py +3 -0
  1121. teradataml/dbutils/dbutils.py +2871 -0
  1122. teradataml/dbutils/filemgr.py +318 -0
  1123. teradataml/gen_ai/__init__.py +2 -0
  1124. teradataml/gen_ai/convAI.py +473 -0
  1125. teradataml/geospatial/__init__.py +4 -0
  1126. teradataml/geospatial/geodataframe.py +1105 -0
  1127. teradataml/geospatial/geodataframecolumn.py +392 -0
  1128. teradataml/geospatial/geometry_types.py +926 -0
  1129. teradataml/hyperparameter_tuner/__init__.py +1 -0
  1130. teradataml/hyperparameter_tuner/optimizer.py +4115 -0
  1131. teradataml/hyperparameter_tuner/utils.py +303 -0
  1132. teradataml/lib/__init__.py +0 -0
  1133. teradataml/lib/aed_0_1.dll +0 -0
  1134. teradataml/lib/libaed_0_1.dylib +0 -0
  1135. teradataml/lib/libaed_0_1.so +0 -0
  1136. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  1137. teradataml/lib/libaed_0_1_ppc64le.so +0 -0
  1138. teradataml/opensource/__init__.py +1 -0
  1139. teradataml/opensource/_base.py +1321 -0
  1140. teradataml/opensource/_class.py +464 -0
  1141. teradataml/opensource/_constants.py +61 -0
  1142. teradataml/opensource/_lightgbm.py +949 -0
  1143. teradataml/opensource/_sklearn.py +1008 -0
  1144. teradataml/opensource/_wrapper_utils.py +267 -0
  1145. teradataml/options/__init__.py +148 -0
  1146. teradataml/options/configure.py +489 -0
  1147. teradataml/options/display.py +187 -0
  1148. teradataml/plot/__init__.py +3 -0
  1149. teradataml/plot/axis.py +1427 -0
  1150. teradataml/plot/constants.py +15 -0
  1151. teradataml/plot/figure.py +431 -0
  1152. teradataml/plot/plot.py +810 -0
  1153. teradataml/plot/query_generator.py +83 -0
  1154. teradataml/plot/subplot.py +216 -0
  1155. teradataml/scriptmgmt/UserEnv.py +4273 -0
  1156. teradataml/scriptmgmt/__init__.py +3 -0
  1157. teradataml/scriptmgmt/lls_utils.py +2157 -0
  1158. teradataml/sdk/README.md +79 -0
  1159. teradataml/sdk/__init__.py +4 -0
  1160. teradataml/sdk/_auth_modes.py +422 -0
  1161. teradataml/sdk/_func_params.py +487 -0
  1162. teradataml/sdk/_json_parser.py +453 -0
  1163. teradataml/sdk/_openapi_spec_constants.py +249 -0
  1164. teradataml/sdk/_utils.py +236 -0
  1165. teradataml/sdk/api_client.py +900 -0
  1166. teradataml/sdk/constants.py +62 -0
  1167. teradataml/sdk/modelops/__init__.py +98 -0
  1168. teradataml/sdk/modelops/_client.py +409 -0
  1169. teradataml/sdk/modelops/_constants.py +304 -0
  1170. teradataml/sdk/modelops/models.py +2308 -0
  1171. teradataml/sdk/spinner.py +107 -0
  1172. teradataml/series/__init__.py +0 -0
  1173. teradataml/series/series.py +537 -0
  1174. teradataml/series/series_utils.py +71 -0
  1175. teradataml/store/__init__.py +12 -0
  1176. teradataml/store/feature_store/__init__.py +0 -0
  1177. teradataml/store/feature_store/constants.py +658 -0
  1178. teradataml/store/feature_store/feature_store.py +4814 -0
  1179. teradataml/store/feature_store/mind_map.py +639 -0
  1180. teradataml/store/feature_store/models.py +7330 -0
  1181. teradataml/store/feature_store/utils.py +390 -0
  1182. teradataml/table_operators/Apply.py +979 -0
  1183. teradataml/table_operators/Script.py +1739 -0
  1184. teradataml/table_operators/TableOperator.py +1343 -0
  1185. teradataml/table_operators/__init__.py +2 -0
  1186. teradataml/table_operators/apply_query_generator.py +262 -0
  1187. teradataml/table_operators/query_generator.py +493 -0
  1188. teradataml/table_operators/table_operator_query_generator.py +462 -0
  1189. teradataml/table_operators/table_operator_util.py +726 -0
  1190. teradataml/table_operators/templates/dataframe_apply.template +184 -0
  1191. teradataml/table_operators/templates/dataframe_map.template +176 -0
  1192. teradataml/table_operators/templates/dataframe_register.template +73 -0
  1193. teradataml/table_operators/templates/dataframe_udf.template +67 -0
  1194. teradataml/table_operators/templates/script_executor.template +170 -0
  1195. teradataml/telemetry_utils/__init__.py +0 -0
  1196. teradataml/telemetry_utils/queryband.py +53 -0
  1197. teradataml/utils/__init__.py +0 -0
  1198. teradataml/utils/docstring.py +527 -0
  1199. teradataml/utils/dtypes.py +943 -0
  1200. teradataml/utils/internal_buffer.py +122 -0
  1201. teradataml/utils/print_versions.py +206 -0
  1202. teradataml/utils/utils.py +451 -0
  1203. teradataml/utils/validators.py +3305 -0
  1204. teradataml-20.0.0.8.dist-info/METADATA +2804 -0
  1205. teradataml-20.0.0.8.dist-info/RECORD +1208 -0
  1206. teradataml-20.0.0.8.dist-info/WHEEL +5 -0
  1207. teradataml-20.0.0.8.dist-info/top_level.txt +1 -0
  1208. teradataml-20.0.0.8.dist-info/zip-safe +1 -0
@@ -0,0 +1,2996 @@
1
+ import re
2
+ from datetime import date
3
+ from teradataml.common.exceptions import TeradataMlException
4
+ from teradataml.common.messages import Messages, MessageCodes
5
+ from teradataml.common.utils import UtilFuncs
6
+ from teradataml.dataframe.dataframe import DataFrame
7
+ from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
8
+ from teradataml.utils.validators import _Validators
9
+ from teradataml.utils.dtypes import _SuppArgTypes, _DtypesMappers
10
+ from teradatasqlalchemy.types import BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME, \
11
+ TIMESTAMP, VARCHAR
12
+
13
+
14
+ class _Transformations(object):
15
+ """ Class to represent different transformation techniques. """
16
+
17
+ def __init__(self, columns=None, out_columns=None, datatype=None,
18
+ columns_optional=True):
19
+ """
20
+ DESCRIPTION:
21
+ Constructor for _Transformations.
22
+ Note:
23
+ It is intended to use as super() class for transformation techniques.
24
+
25
+ PARAMETERS:
26
+ columns:
27
+ Optional Argument.
28
+ Required when "out_columns" is used or "columns_optional" is False.
29
+ Specifies the names of the columns.
30
+ Types: str or list of str
31
+
32
+ out_columns:
33
+ Optional Argument.
34
+ Specifies the names of the output columns.
35
+ Notes:
36
+ 1. "columns" argument must be used, when this argument is used.
37
+ 2. Number of elements in "columns" and "out_columns" must be same.
38
+ Types: str or list of str
39
+
40
+ datatype:
41
+ Optional Argument.
42
+ Specifies the name of the intended datatype of the output column.
43
+ Intended data types for the output column can be specified using either the
44
+ teradatasqlalchemy types or the permitted strings mentioned below:
45
+ -------------------------------------------------------------------
46
+ | If intended SQL Data Type is | Permitted Value to be passed is |
47
+ |-------------------------------------------------------------------|
48
+ | bigint | bigint |
49
+ | byteint | byteint |
50
+ | char(n) | char,n |
51
+ | date | date |
52
+ | decimal(m,n) | decimal,m,n |
53
+ | float | float |
54
+ | integer | integer |
55
+ | number(*) | number |
56
+ | number(n) | number,n |
57
+ | number(*,n) | number,*,n |
58
+ | number(n,n) | number,n,n |
59
+ | smallint | smallint |
60
+ | time(p) | time,p |
61
+ | timestamp(p) | timestamp,p |
62
+ | varchar(n) | varchar,n |
63
+ --------------------------------------------------------------------
64
+ Notes:
65
+ 1. Argument is ignored if "columns" argument is not used.
66
+ 2. char without a size is not supported.
67
+ 3. number(*) does not include the * in its datatype format.
68
+ Examples:
69
+ 1. If intended datatype for the output column is "bigint", then
70
+ pass string "bigint" to the argument as shown below:
71
+ datatype="bigint"
72
+ 2. If intended datatype for the output column is "decimal(3,5)", then
73
+ pass string "decimal,3,5" to the argument as shown below:
74
+ datatype="decimal,3,5"
75
+ Types: str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
76
+ TIMESTAMP, VARCHAR.
77
+
78
+ columns_optional:
79
+ Optional Argument.
80
+ Specifies whether to treat "columns" argument as required or optional.
81
+ Default Value: True ("columns" is optional)
82
+ Types: bool
83
+
84
+ RETURNS:
85
+ An instance of _Transformations class.
86
+
87
+ RAISES:
88
+ TeradataMlException, TypeError, ValueError
89
+
90
+ EXAMPLE:
91
+ _Transformations(columns="col1")
92
+ """
93
+ self.columns = columns
94
+ self.out_columns = out_columns
95
+ self.datatype = datatype
96
+
97
+ # Validations
98
+ arg_info_matrix = []
99
+ arg_info_matrix.append(
100
+ ["columns", self.columns, columns_optional, (str, list), True])
101
+ arg_info_matrix.append(["out_columns", self.out_columns, True, (str, list), True])
102
+ arg_info_matrix.append(["datatype", self.datatype, True, _SuppArgTypes.VAL_ARG_DATATYPE, True])
103
+
104
+ # Validate for missing required arguments.
105
+ _Validators._validate_missing_required_arguments(arg_info_matrix)
106
+
107
+ # Argument validations.
108
+ _Validators._validate_function_arguments(arg_info_matrix)
109
+
110
+ if self.out_columns is not None:
111
+ # Raise error, if "columns" not provided and "out_columns" is provided.
112
+ _Validators._validate_dependent_argument("out_columns", self.out_columns, "columns", self.columns)
113
+
114
+ if len(UtilFuncs._as_list(self.out_columns)) != len(
115
+ UtilFuncs._as_list(self.columns)):
116
+ # Raise error, if length of the input and output columns is not same.
117
+ err_ = Messages.get_message(MessageCodes.INVALID_LENGTH_ARGS,
118
+ "columns and out_columns")
119
+ raise TeradataMlException(err_, MessageCodes.INVALID_LENGTH_ARGS)
120
+
121
+ def _val_transformation_fmt(self):
122
+ """
123
+ DESCRIPTION:
124
+ Internal function to return a string representation of basic Transformation
125
+ technique arguments "columns", "out_columns" and "datatype" as per SQL syntax
126
+ of the function.
127
+ Function will return empty string if "column" argument is None.
128
+
129
+ PARAMETERS:
130
+ None.
131
+
132
+ RETURNS:
133
+ String representing SQL syntax for arguments "columns", "out_columns"
134
+ and "datatype".
135
+
136
+ RAISES:
137
+ None.
138
+
139
+ EXAMPLE:
140
+ self._val_transformation_fmt()
141
+ """
142
+ ret_value = ""
143
+ if self.columns is not None:
144
+ self.columns = UtilFuncs._as_list(self.columns)
145
+ columns_fmt = "columns({})"
146
+
147
+ columns_arg_values = self.columns
148
+ if self.out_columns:
149
+ self.out_columns = UtilFuncs._as_list(self.out_columns)
150
+ columns_arg_values = []
151
+ for col, out_col in dict(zip(self.columns, self.out_columns)).items():
152
+ columns_arg_values.append("{}/{}".format(col, out_col))
153
+
154
+ ret_value = columns_fmt.format(", ".join(columns_arg_values))
155
+
156
+ if self.datatype:
157
+ if not isinstance(self.datatype, str):
158
+ self.datatype = _DtypesMappers.TDSQLALCHEMY_DATATYPE_TO_VAL_STRING_MAPPER[type(self.datatype)]\
159
+ (self.datatype)
160
+ ret_value = "{}, datatype({})".format(ret_value, self.datatype)
161
+ return ret_value
162
+
163
+
164
+ class FillNa(_Transformations):
165
+ """ Class to represent null replacement transformation technique. """
166
+
167
+ def __init__(self, style="mean", value=None, columns=None, out_columns=None,
168
+ datatype=None):
169
+ """
170
+ DESCRIPTION:
171
+ FillNa allows user to perform missing value/null replacement transformations.
172
+ Note:
173
+ Output of this function is passed to "fillna" argument of "Transform"
174
+ function from Vantage Analytic Library.
175
+
176
+ PARAMETERS:
177
+ style:
178
+ Optional Argument.
179
+ Specifies the nullstyle for missing value/null value replacement.
180
+ A literal value, the mean, median, mode, or an imputed value joined
181
+ from another table can be used as the replacement value. The median
182
+ value can be requested with or without averaging of two middle values
183
+ when there is an even number of values.
184
+ Literal value replacement is supported for numeric, character, and
185
+ date data types.
186
+ Mean value replacement is supported for columns of numeric type or
187
+ date type.
188
+ Median without averaging, mode, and imputed value replacement are
189
+ valid for any supported type. Median with averaging is supported
190
+ only for numeric and date type.
191
+ Permitted Values: 'literal', 'mean', 'median', 'mode', 'median_wo_mean',
192
+ 'imputed'
193
+ Default Value: 'mean'
194
+ Types: str
195
+
196
+ value:
197
+ Optional Argument. Required when "style" is 'literal' or 'imputed'.
198
+ Specifies the value to be used for null replacement transformations.
199
+ Notes:
200
+ 1. When "style" is 'imputed', value must be of type teradataml
201
+ DataFrame.
202
+ 2. When "style" is 'literal', value can be of any type.
203
+ 3. If date values are entered as string, the keyword 'DATE' must precede
204
+ the date value, and do not enclose in single quotes OR
205
+ pass a datetime.date object.
206
+ For example,
207
+ value='DATE 1987-06-09'
208
+ value=date(1987, 6, 9)
209
+ Types: teradataml DataFrame, bool, int, str, float, datetime.date
210
+
211
+ columns:
212
+ Optional Argument.
213
+ Specifies the names of the columns.
214
+ Types: str or list of str
215
+
216
+ out_columns:
217
+ Optional Argument.
218
+ Specifies the names of the output columns.
219
+ Notes:
220
+ Number of elements in "columns" and "out_columns" must be same.
221
+ Types: str or list of str
222
+
223
+ datatype:
224
+ Optional Argument.
225
+ Specifies the name of the intended datatype of the output column.
226
+ Intended data types for the output column can be specified using either the
227
+ teradatasqlalchemy types or the permitted strings mentioned below:
228
+ -------------------------------------------------------------------
229
+ | If intended SQL Data Type is | Permitted Value to be passed is |
230
+ |-------------------------------------------------------------------|
231
+ | bigint | bigint |
232
+ | byteint | byteint |
233
+ | char(n) | char,n |
234
+ | date | date |
235
+ | decimal(m,n) | decimal,m,n |
236
+ | float | float |
237
+ | integer | integer |
238
+ | number(*) | number |
239
+ | number(n) | number,n |
240
+ | number(*,n) | number,*,n |
241
+ | number(n,n) | number,n,n |
242
+ | smallint | smallint |
243
+ | time(p) | time,p |
244
+ | timestamp(p) | timestamp,p |
245
+ | varchar(n) | varchar,n |
246
+ --------------------------------------------------------------------
247
+ Notes:
248
+ 1. char without a size is not supported.
249
+ 2. number(*) does not include the * in its datatype format.
250
+ Examples:
251
+ 1. If intended datatype for the output column is "bigint", then
252
+ pass string "bigint" to the argument as shown below:
253
+ datatype="bigint"
254
+ 2. If intended datatype for the output column is "decimal(3,5)", then
255
+ pass string "decimal,3,5" to the argument as shown below:
256
+ datatype="decimal,3,5"
257
+ Types: str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
258
+ TIMESTAMP, VARCHAR.
259
+
260
+ RETURNS:
261
+ An instance of FillNa class.
262
+
263
+ RAISES:
264
+ TeradataMlException, TypeError, ValueError
265
+
266
+ EXAMPLE:
267
+ # Note:
268
+ # To run any transformation, user needs to use Transform() function from
269
+ # Vantage Analytic Library.
270
+ # To do so import valib first and set the "val_install_location".
271
+ >>> from teradataml import configure, DataFrame, FillNa, load_example_data, valib
272
+ >>> configure.val_install_location = "SYSLIB"
273
+ >>>
274
+
275
+ # Load example data.
276
+ >>> load_example_data("dataframe", ["sales", "employee_info"])
277
+ >>>
278
+
279
+ # Create the required DataFrames.
280
+ >>> sales = DataFrame("sales")
281
+ >>> sales
282
+ Feb Jan Mar Apr datetime
283
+ accounts
284
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
285
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
286
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
287
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
288
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
289
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
290
+ >>>
291
+
292
+ # Example 1: Replace missing values in columns 'Jan' and 'Mar', with
293
+ # a literal value 0. Output columns are named as 'january' and
294
+ # 'march' respectively.
295
+ >>> fillna_literal = FillNa(style="literal", value=0, columns=["Jan", "Mar"],
296
+ ... out_columns=["january", "march"])
297
+ >>> obj = valib.Transform(data=sales, fillna=fillna_literal, key_columns="accounts")
298
+ >>> obj.result
299
+ accounts january march
300
+ 0 Blue Inc 50 95
301
+ 1 Orange Inc 0 0
302
+ 2 Red Inc 150 140
303
+ 3 Yellow Inc 0 0
304
+ 4 Jones LLC 150 140
305
+ 5 Alpha Co 200 215
306
+ >>>
307
+
308
+
309
+ # Example 2: Replace missing values in column 'Jan' with 'median' value from
310
+ # that column. Output column produced in the output is named as
311
+ # 'Jan2'.
312
+ >>> fillna_median = FillNa(style="median", columns="Jan", out_columns="Jan2")
313
+ >>> obj = valib.Transform(data=sales, fillna=fillna_median, key_columns="accounts")
314
+ >>> obj.result
315
+ accounts Jan2
316
+ 0 Alpha Co 200.000
317
+ 1 Red Inc 150.000
318
+ 2 Orange Inc 150.000
319
+ 3 Jones LLC 150.000
320
+ 4 Yellow Inc 150.000
321
+ 5 Blue Inc 50.000
322
+ >>>
323
+
324
+
325
+ # Example 3: Replace missing values in column 'Apr' with a median value
326
+ # without mean from that column.
327
+ >>> fillna_mwm = FillNa(style="median_wo_mean", columns="Apr")
328
+ >>> obj = valib.Transform(data=sales, fillna=fillna_mwm, key_columns="accounts")
329
+ >>> obj.result
330
+ accounts Apr
331
+ 0 Alpha Co 250
332
+ 1 Blue Inc 101
333
+ 2 Yellow Inc 180
334
+ 3 Jones LLC 180
335
+ 4 Red Inc 180
336
+ 5 Orange Inc 250
337
+ >>>
338
+
339
+
340
+ # Example 4: Replace missing values in column 'Apr' with 'mode' value from
341
+ # that column. Output column produced in the output is named as
342
+ # 'Apr2000'.
343
+ >>> fillna_mode = FillNa(style="mode", columns="Apr", out_columns="Apr2000")
344
+ >>> obj = valib.Transform(data=sales, fillna=fillna_mode, key_columns="accounts")
345
+ >>> obj.result
346
+ accounts Apr2000
347
+ 0 Blue Inc 101
348
+ 1 Orange Inc 250
349
+ 2 Red Inc 250
350
+ 3 Yellow Inc 250
351
+ 4 Jones LLC 180
352
+ 5 Alpha Co 250
353
+ >>>
354
+
355
+
356
+ # Example 5: Replace missing values in columns 'masters' and 'programming' using
357
+ # 'imputed' style.
358
+ >>> load_example_data("dataframe", ["admissions_train_nulls", "admissions_train"])
359
+
360
+ # Dataframe to be used for 'imputed' style replacement.
361
+ >>> admissions_train = DataFrame("admissions_train")
362
+ >>> admissions_train
363
+ masters gpa stats programming admitted
364
+ id
365
+ 22 yes 3.46 Novice Beginner 0
366
+ 26 yes 3.57 Advanced Advanced 1
367
+ 5 no 3.44 Novice Novice 0
368
+ 17 no 3.83 Advanced Advanced 1
369
+ 13 no 4.00 Advanced Novice 1
370
+ 19 yes 1.98 Advanced Advanced 0
371
+ 36 no 3.00 Advanced Novice 0
372
+ 15 yes 4.00 Advanced Advanced 1
373
+ 34 yes 3.85 Advanced Beginner 0
374
+ 38 yes 2.65 Advanced Beginner 1
375
+ >>>
376
+
377
+ # DataFrame containing NULL values in columns "programming" and "masters".
378
+ >>> admissions_train_nulls = DataFrame("admissions_train_nulls")
379
+ >>> admissions_train_nulls
380
+ masters gpa stats programming admitted
381
+ id
382
+ 5 no 3.44 Novice Novice 0
383
+ 7 yes 2.33 Novice Novice 1
384
+ 22 None 3.46 Novice None 0
385
+ 19 yes 1.98 Advanced Advanced 0
386
+ 15 None 4.00 Advanced Advanced 1
387
+ 17 None 3.83 Advanced Advanced 1
388
+ 34 None 3.85 Advanced Beginner 0
389
+ 13 no 4.00 Advanced Novice 1
390
+ 36 no 3.00 Advanced Novice 0
391
+ 40 yes 3.95 Novice Beginner 0
392
+ >>>
393
+
394
+ # Replace NULL values in the columns "masters" and "programming"
395
+ # in admissions_train_nulls dataframe with the values in the corresponding
396
+ # columns' values in admissions_train dataframe.
397
+ >>> fillna_imputed = FillNa(style="imputed",
398
+ ... columns=["masters", "programming"],
399
+ ... value=admissions_train)
400
+ >>> obj = valib.Transform(data=admissions_train_nulls,
401
+ ... fillna=fillna_imputed,
402
+ ... key_columns="id")
403
+ >>> obj.result
404
+ id masters programming
405
+ 0 22 yes Beginner
406
+ 1 36 no Novice
407
+ 2 15 yes Advanced
408
+ 3 38 yes Beginner
409
+ 4 5 no Novice
410
+ 5 17 no Advanced
411
+ 6 34 yes Beginner
412
+ 7 13 no Novice
413
+ 8 26 yes Advanced
414
+ 9 19 yes Advanced
415
+ >>>
416
+
417
+
418
+ # Example 6: This example shows how one can operate on date and character
419
+ # columns. Example also showcases using multiple missing value
420
+ # treatment techniques in one single call for variable
421
+ # transformation.
422
+ # Create the required DataFrames.
423
+ >>> einfo = DataFrame("employee_info")
424
+ >>> einfo
425
+ first_name marks dob joined_date
426
+ employee_no
427
+ 100 abcd None None None
428
+ 112 None None None 18/12/05
429
+ 101 abcde None None 02/12/05
430
+ >>>
431
+
432
+ # Using literal style for missing value treatment on a date type
433
+ # column "joined_date".
434
+ >>> fillna_1 = FillNa(style="literal", value="DATE 1995-12-23",
435
+ ... columns="joined_date", out_columns="date1")
436
+
437
+ # Using literal style for missing value treatment on a character type
438
+ # column "first_name". Repalce missing values with 'FNU', i.e.,
439
+ # First Name Unknown.
440
+ >>> fillna_2 = FillNa(style="literal", value="FNU", columns="first_name",
441
+ ... out_columns="char1")
442
+
443
+ # Using mean value for missing value treatment on a date type
444
+ # column "joined_date".
445
+ >>> fillna_3 = FillNa(style="mean", columns="joined_date",
446
+ ... out_columns="date2")
447
+
448
+ # Using median value for missing value treatment on a date type
449
+ # column "joined_date".
450
+ >>> fillna_4 = FillNa(style="median", columns="joined_date",
451
+ ... out_columns="date2A")
452
+
453
+ # Using median value without mean for missing value treatment on
454
+ # a date type column "joined_date".
455
+ >>> fillna_5 = FillNa(style="median_wo_mean", columns="joined_date",
456
+ ... out_columns="date3")
457
+
458
+ # Using mode value for missing value treatment on a date type
459
+ # column "joined_date".
460
+ >>> fillna_6 = FillNa(style="mode", columns="joined_date",
461
+ ... out_columns="date4")
462
+
463
+ # Using median value without mean for missing value treatment on
464
+ # a character type column "first_name".
465
+ >>> fillna_7 = FillNa(style="median_wo_mean", columns="first_name",
466
+ ... out_columns="char2")
467
+
468
+ # Using mode value for missing value treatment on a character type
469
+ # column "first_name".
470
+ >>> fillna_8 = FillNa(style="mode", columns="first_name",
471
+ ... out_columns="char3")
472
+
473
+ # Perform the missing value transformations using Transform() function
474
+ # from Vantage Analytic Library.
475
+ >>> obj = valib.Transform(data=einfo,
476
+ ... fillna=[fillna_1, fillna_2, fillna_3, fillna_4,
477
+ ... fillna_5, fillna_6, fillna_7, fillna_8],
478
+ ... key_columns="employee_no")
479
+ >>> obj.result
480
+ employee_no date1 char1 date2 date2A date3 date4 char2 char3
481
+ 0 112 18/12/05 FNU 18/12/05 18/12/05 18/12/05 18/12/05 abcd abcd
482
+ 1 101 02/12/05 abcde 02/12/05 02/12/05 02/12/05 02/12/05 abcde abcde
483
+ 2 100 95/12/23 abcd 60/12/04 60/12/04 02/12/05 02/12/05 abcd abcd
484
+ >>>
485
+ """
486
+ # Call super()
487
+ super().__init__(columns=columns, out_columns=out_columns, datatype=datatype, columns_optional=True)
488
+ # Initialize style and value as data members.
489
+ self.style = style
490
+ self.value = value
491
+
492
+ # Validations
493
+ arg_info_matrix = []
494
+ permitted_styles = ["LITERAL", "MEAN", "MEDIAN", "MEDIAN_WO_MEAN", "MODE",
495
+ "IMPUTED"]
496
+ arg_info_matrix.append(["style", self.style, True, str, True, permitted_styles])
497
+ arg_info_matrix.append(["value", self.value, True, (DataFrame, bool, int, float, str, date)])
498
+
499
+ # Note:
500
+ # Validations for "columns", "out_columns" and "datatype" is done by super().
501
+ # Other argument validations.
502
+ _Validators._validate_function_arguments(arg_info_matrix)
503
+
504
+ # If date object is passed to "value" argument
505
+ # then convert date object to string format 'DATE 1978-06-09'
506
+ if isinstance(self.value, date):
507
+ self.value = UtilFuncs._convert_date_to_string(self.value)
508
+
509
+ # If style is 'MEDIAN_WO_MEAN', in SQL we will use 'medianwithoutaveraging'.
510
+ if self.style.upper() == "MEDIAN_WO_MEAN":
511
+ self.style = "medianwithoutaveraging"
512
+
513
+ # "value" should be provided when "style" is 'literal' or 'imputed'.
514
+ # "value" is ignored when style is other than 'literal' or 'imputed'.
515
+ if self.style.upper() in ["LITERAL", "IMPUTED"] and value is None:
516
+ err_ = Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING, "value",
517
+ "style={}".format(self.style))
518
+ raise TeradataMlException(err_, MessageCodes.DEPENDENT_ARG_MISSING)
519
+
520
+ if self.style.upper() == "IMPUTED":
521
+ if not isinstance(value, DataFrame):
522
+ err_ = Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, "value",
523
+ "teradataml DataFrame")
524
+ raise TypeError(err_)
525
+ else:
526
+ if value._table_name is None:
527
+ value._table_name = df_utils._execute_node_return_db_object_name(
528
+ value._nodeid, value._metaexpr)
529
+ self.value = UtilFuncs._extract_table_name(value._table_name).replace(
530
+ "\"", "")
531
+
532
+ # Add double single quotes when "value" contains any of the special val symbols.
533
+ # Note:
534
+ # Following are special val symbols {'{', '}', '(', ')', ',', '/'}.
535
+ if isinstance(self.value, str) and len(re.findall(r'[\{\}\(\),/]', self.value)) > 0:
536
+ self.value = """''{}''""".format(self.value)
537
+
538
+ def _val_nullstyle_fmt(self):
539
+ """
540
+ DESCRIPTION:
541
+ Internal function to return a string representation of nullstyle
542
+ Transformation technique.
543
+
544
+ PARAMETERS:
545
+ None.
546
+
547
+ RETURNS:
548
+ String representing nullstyle SQL syntax.
549
+
550
+ RAISES:
551
+ None.
552
+
553
+ EXAMPLE:
554
+ self._val_nullstyle_fmt()
555
+ """
556
+ nullstyle_fmt = "nullstyle({})"
557
+
558
+ nullstyle_args = self.style.lower()
559
+ if self.style.upper() in ["LITERAL", "IMPUTED"]:
560
+ nullstyle_args = "{}, {}".format(self.style.lower(), '\'\'\'\'' if isinstance(self.value, str) and len(
561
+ self.value) == 0 else self.value)
562
+ return nullstyle_fmt.format(nullstyle_args)
563
+
564
+ def _val_sql_syntax(self):
565
+ """
566
+ DESCRIPTION:
567
+ Internal function to return a string representation of null replacement
568
+ Transformation as required by SQL.
569
+
570
+ PARAMETERS:
571
+ None.
572
+
573
+ RETURNS:
574
+ String representing SQL syntax for 'nullreplacement' SQL argument.
575
+
576
+ RAISES:
577
+ None.
578
+
579
+ EXAMPLE:
580
+ self._val_sql_syntax()
581
+ """
582
+ ret_value = self._val_nullstyle_fmt()
583
+ columns_fmt = self._val_transformation_fmt()
584
+ if columns_fmt:
585
+ ret_value = "{}, {}".format(ret_value, columns_fmt)
586
+
587
+ return "{" + ret_value + "}"
588
+
589
+ class Binning(_Transformations):
590
+ """ Class to represent binning transformation technique. """
591
+
592
+ def __init__(self, columns, style="bins", value=10, lbound=None, ubound=None,
593
+ out_columns=None, datatype=None, fillna=None, **kwargs):
594
+ """
595
+ DESCRIPTION:
596
+ Binning allows user to perform bin coding to replace continuous numeric
597
+ column with a categorical one to produce ordinal values (for example,
598
+ numeric categorical values where order is meaningful). Binning uses the
599
+ same techniques used in Histogram analysis, allowing you to choose between:
600
+ 1. equal-width bins,
601
+ 2. equal-width bins with a user-specified minimum and maximum range,
602
+ 3. bins with a user-specified width,
603
+ 4. evenly distributed bins, or
604
+ 5. bins with user-specified boundaries.
605
+
606
+ If the minimum and maximum are specified, all values less than the minimum
607
+ are put into bin 0, while all values greater than the maximum are put into
608
+ bin N+1. The same is true when the boundary option is specified.
609
+
610
+ Bin Coding supports numeric and date type columns. If date values are entered,
611
+ the keyword DATE must precede the date value, and do not enclose in single
612
+ quotes.
613
+
614
+ Note:
615
+ Output of this function is passed to "bins" argument of "Transform"
616
+ function from Vantage Analytic Library.
617
+
618
+ PARAMETERS:
619
+ columns:
620
+ Required Argument.
621
+ Specifies the names of the columns to perform transformation on.
622
+ Types: str or list of str
623
+
624
+ style:
625
+ Optional Argument.
626
+ Specifies the bin style to use.
627
+ Permitted Values:
628
+ * "bins":
629
+ This style allows user to specify equal-width bins without any
630
+ boundaries. Argument "values" must be used when this style of
631
+ binning is used.
632
+ * "binswithboundaries":
633
+ This style allows user to specify equal-width bins with minimum
634
+ and maximum range. Arguments "values", "lbound" and "ubound" must
635
+ be used when this style of binning is used.
636
+ All values less than the minimum are put into bin 0, while all
637
+ values greater than the maximum are put into bin N+1.
638
+ * "boundaries":
639
+ This style allows user to specify bins with boundaries.
640
+ To specify boundaries one should use keyword arguments as:
641
+ b1 --> To specify first boundary.
642
+ b2 --> To specify second boundary.
643
+ b3 --> To specify third boundary.
644
+ ...
645
+ bN --> To specify Nth boundary.
646
+ All values less than the first boundary value are put into bin 0,
647
+ while all values greater than the last boundary value are put into
648
+ bin N+1.
649
+ See "kwargs" description below for more details on how these
650
+ arguments must be used.
651
+ * "quantiles":
652
+ This style allows user to specify evenly-distributed bins.
653
+ Argument "values" must be used when this style of binning is used.
654
+ * "width":
655
+ This style allows user to specify bins with widths. Argument
656
+ "values" must be used when this style of binning is used.
657
+ Default Value: 'bins'
658
+ Types: str
659
+
660
+ value:
661
+ Optional Argument.
662
+ Specifies the value to be used for bin code transformations.
663
+ When bin style is:
664
+ * 'bins' or 'binswithboundaries' argument specifies the number of bins.
665
+ * 'quantiles' argument specifies the number of quantiles.
666
+ * 'width' argument specifies the bin width.
667
+ Note:
668
+ Ignored when style is 'boundaries'.
669
+ Default Value: 10
670
+ Types: int
671
+
672
+ lbound:
673
+ Optional Argument.
674
+ Specifies the minimum boundary value for 'binswithboundaries' style.
675
+ Notes:
676
+ 1. Ignored when style is not 'binswithboundaries'.
677
+ 2. If date values are entered as string, the keyword 'DATE' must precede
678
+ the date value, and do not enclose in single quotes OR
679
+ pass a datetime.date object.
680
+ For example,
681
+ value='DATE 1987-06-09'
682
+ value=date(1987, 6, 9)
683
+ Types: int, float, str, datetime.date
684
+
685
+ ubound:
686
+ Optional Argument.
687
+ Specifies the maximum boundary value for 'binswithboundaries' style.
688
+ Notes:
689
+ 1. Ignored when style is not 'binswithboundaries'.
690
+ 2. If date values are entered as string, the keyword 'DATE' must precede
691
+ the date value, and do not enclose in single quotes OR
692
+ pass a datetime.date object.
693
+ For example,
694
+ value='DATE 1987-06-09'
695
+ value=date(1987, 6, 9)
696
+ Types: int, float, str, datetime.date
697
+
698
+ out_columns:
699
+ Optional Argument.
700
+ Specifies the names of the output columns.
701
+ Note:
702
+ Number of elements in "columns" and "out_columns" must be same.
703
+ Types: str or list of str
704
+
705
+ datatype:
706
+ Optional Argument.
707
+ Specifies the name of the intended datatype of the output column.
708
+ Intended data types for the output column can be specified using either the
709
+ teradatasqlalchemy types or the permitted strings mentioned below:
710
+ -------------------------------------------------------------------
711
+ | If intended SQL Data Type is | Permitted Value to be passed is |
712
+ |-------------------------------------------------------------------|
713
+ | bigint | bigint |
714
+ | byteint | byteint |
715
+ | char(n) | char,n |
716
+ | date | date |
717
+ | decimal(m,n) | decimal,m,n |
718
+ | float | float |
719
+ | integer | integer |
720
+ | number(*) | number |
721
+ | number(n) | number,n |
722
+ | number(*,n) | number,*,n |
723
+ | number(n,n) | number,n,n |
724
+ | smallint | smallint |
725
+ | time(p) | time,p |
726
+ | timestamp(p) | timestamp,p |
727
+ | varchar(n) | varchar,n |
728
+ --------------------------------------------------------------------
729
+ Notes:
730
+ 1. Argument is ignored if "columns" argument is not used.
731
+ 2. char without a size is not supported.
732
+ 3. number(*) does not include the * in its datatype format.
733
+ Examples:
734
+ 1. If intended datatype for the output column is "bigint", then
735
+ pass string "bigint" to the argument as shown below:
736
+ datatype="bigint"
737
+ 2. If intended datatype for the output column is "decimal(3,5)", then
738
+ pass string "decimal,3,5" to the argument as shown below:
739
+ datatype="decimal,3,5"
740
+ Types: str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
741
+ TIMESTAMP, VARCHAR.
742
+
743
+ fillna:
744
+ Optional Argument.
745
+ Specifies whether the null replacement/missing value treatment should
746
+ be performed with binning or not. Output of FillNa() can be passed to
747
+ this argument.
748
+ Note:
749
+ If the FillNa object is created with its arguments "columns",
750
+ "out_columns" and "datatype", then values passed in FillNa() arguments
751
+ are ignored. Only nullstyle information is captured from the same.
752
+ Types: FillNa
753
+
754
+ kwargs:
755
+ Specifies the keyword arguments to provide the boundaries required
756
+ for binning with bin style 'boundaries'.
757
+ To specify boundaries one should use keyword arguments as:
758
+ b1 --> To specify first boundary.
759
+ b2 --> To specify second boundary.
760
+ b3 --> To specify third boundary.
761
+ ...
762
+ bN --> To specify Nth boundary.
763
+ Notes:
764
+ 1. When keyword arguments are used, make sure to specify boundaries
765
+ in sequence, i.e., b1, b2, b3, ...
766
+ In case a sequential keyword argument is missing an error is
767
+ raised.
768
+ 2. Keyword arguments specified for the boundaries must start with 'b'.
769
+ 3. First boundary must always be specified with "b1" argument.
770
+ Types: int, float, str, datetime.date
771
+
772
+ RETURNS:
773
+ An instance of Binning class.
774
+
775
+ RAISES:
776
+ TeradataMlException, TypeError, ValueError
777
+
778
+ EXAMPLE:
779
+ # Note:
780
+ # To run any transformation, user needs to use Transform() function from
781
+ # Vantage Analytic Library.
782
+ # To do so import valib first and set the "val_install_location".
783
+ >>> from teradataml import configure, DataFrame, Binning, FillNa, load_example_data, valib
784
+ >>> configure.val_install_location = "SYSLIB"
785
+ >>>
786
+
787
+ # Load example data.
788
+ >>> load_example_data("movavg", "ibm_stock")
789
+ >>>
790
+
791
+ # Create the required teradataml DataFrame.
792
+ >>> ibm_stock = DataFrame.from_table("ibm_stock")
793
+ >>> ibm_stock
794
+ name period stockprice
795
+ id
796
+ 183 ibm 62/02/07 552.0
797
+ 202 ibm 62/03/07 548.0
798
+ 181 ibm 62/02/05 551.0
799
+ 242 ibm 62/05/02 482.0
800
+ 364 ibm 62/10/25 331.0
801
+ 221 ibm 62/04/03 513.0
802
+ 38 ibm 61/07/11 473.0
803
+ 366 ibm 62/10/29 352.0
804
+ 326 ibm 62/08/30 387.0
805
+ 61 ibm 61/08/11 497.0
806
+ >>>
807
+
808
+ # Example 1: Binning is carried out with "bins" style, i.e. equal-width
809
+ # binning, with 5 number of bins. Null replacement is also combined
810
+ # with binning.
811
+ # "key_columns" argument must be used with Transform() function,
812
+ # when null replacement is being done.
813
+ >>> fn = FillNa(style="literal", value=0)
814
+ >>> bins = Binning(style="bins", value=5, columns="stockprice", fillna=fn)
815
+
816
+ # Execute Transform() function.
817
+ >>> obj = valib.Transform(data=ibm_stock,
818
+ ... bins=bins,
819
+ ... key_columns="id")
820
+ >>> obj.result
821
+ id stockprice
822
+ 0 263 1
823
+ 1 324 2
824
+ 2 303 2
825
+ 3 99 5
826
+ 4 36 3
827
+ 5 97 5
828
+ 6 160 5
829
+ 7 59 4
830
+ 8 19 4
831
+ 9 122 5
832
+ >>>
833
+
834
+
835
+ # Example 2: Binning is carried out with multiple styles.
836
+
837
+ # 'binswithboundaries' style:
838
+ # Equal-width bins with a user-specified minimum and maximum range on 'period'
839
+ # column. Resultant output return the value with the same column name. Number
840
+ # of bins created are 5.
841
+ >>> bins_1 = Binning(style="binswithboundaries",
842
+ ... value=5,
843
+ ... lbound="DATE 1962-01-01",
844
+ ... ubound="DATE 1962-06-01",
845
+ ... columns="period")
846
+ >>>
847
+
848
+ # 'boundaries' style:
849
+ # Bins created with user specified boundaries on 'period' column. Resultant
850
+ # column is names as 'period2'. Three boundaries are specified with arguments
851
+ # "b1", "b2" and "b3". When using this style, keyword argument names must
852
+ # start with 'b' and they should be in sequence b1, b2, ..., bN.
853
+ >>> bins_2 = Binning(style="boundaries",
854
+ ... b1="DATE 1962-01-01",
855
+ ... b2="DATE 1962-06-01",
856
+ ... b3="DATE 1962-12-31",
857
+ ... columns="period",
858
+ ... out_columns="period2")
859
+ >>>
860
+
861
+ # Execute Transform() function.
862
+ >>> obj = valib.Transform(data=ibm_stock,
863
+ ... bins=[bins_1, bins_2])
864
+ >>> obj.result
865
+ id period period2
866
+ 0 223 4 1
867
+ 1 345 6 2
868
+ 2 120 0 0
869
+ 3 343 6 2
870
+ 4 57 0 0
871
+ 5 118 0 0
872
+ 6 200 3 1
873
+ 7 80 0 0
874
+ 8 162 1 1
875
+ 9 40 0 0
876
+ >>>
877
+
878
+
879
+ # Example 3: Binning is carried out with multiple styles 'quantiles' and
880
+ # 'width'.
881
+
882
+ # 'quantiles' style :
883
+ # Evenly distributed bins on 'stockprice' column. Resultant output returns
884
+ # the column with name 'stockprice_q'. Number of quantiles considered here
885
+ # are 4.
886
+ >>> bins_1 = Binning(style="quantiles",
887
+ ... value=4,
888
+ ... out_columns="stockprice_q",
889
+ ... columns="stockprice")
890
+ >>>
891
+
892
+ # 'width' style :
893
+ # Bins with user specified width on 'stockprice' column. Resultant output
894
+ # returns the column with name 'stockprice_w'. Width considered for binning
895
+ # is 5.
896
+ >>> bins_2 = Binning(style="width",
897
+ ... value=5,
898
+ ... out_columns="stockprice_w",
899
+ ... columns="stockprice")
900
+ >>>
901
+
902
+ # Execute Transform() function.
903
+ >>> obj = valib.Transform(data=ibm_stock,
904
+ ... bins=[bins_1, bins_2])
905
+ >>> obj.result
906
+ id stockprice_q stockprice_w
907
+ 0 183 4 50
908
+ 1 202 3 49
909
+ 2 181 4 50
910
+ 3 242 2 36
911
+ 4 364 1 6
912
+ 5 221 3 42
913
+ 6 38 2 34
914
+ 7 366 1 10
915
+ 8 326 1 17
916
+ 9 61 3 39
917
+ >>>
918
+ """
919
+ # Call super()
920
+ super().__init__(columns=columns, out_columns=out_columns, datatype=datatype,
921
+ columns_optional=False)
922
+
923
+ # Initialize style and value as data members.
924
+ self.style = style
925
+ self.value = value
926
+ self.lbound = lbound
927
+ self.ubound = ubound
928
+ self.fillna = fillna
929
+ self.kwargs = kwargs
930
+
931
+ # Validations
932
+ arg_info_matrix = []
933
+ permitted_styles = ["BINS", "BINSWITHBOUNDARIES", "BOUNDARIES", "QUANTILES",
934
+ "WIDTH"]
935
+ arg_info_matrix.append(["style", self.style, True, str, True, permitted_styles])
936
+ arg_info_matrix.append(["value", self.value, True, int])
937
+ arg_info_matrix.append(["lbound", self.lbound, True, (bool, int, float, str, date)])
938
+ arg_info_matrix.append(["ubound", self.ubound, True, (bool, int, float, str, date)])
939
+ arg_info_matrix.append(["fillna", self.fillna, True, FillNa])
940
+ # Note:
941
+ # Validations for "columns", "out_columns" and "datatype" is done by super().
942
+ # Other argument validations.
943
+ _Validators._validate_function_arguments(arg_info_matrix)
944
+
945
+ # "value" should be provided when "style" is 'bins'.
946
+ if self.style.upper() in ["BINS", "QUANTILES", "WIDTH"] and self.value is None:
947
+ err_ = Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING, "value",
948
+ "style={}".format(self.style))
949
+ raise TeradataMlException(err_, MessageCodes.DEPENDENT_ARG_MISSING)
950
+
951
+ # "value", "lbound", "ubound" should be provided when "style" is 'binswithboundaries'.
952
+ if self.style.upper() == "BINSWITHBOUNDARIES":
953
+ if self.value is None:
954
+ err_ = Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING, "value",
955
+ "style={}".format(self.style))
956
+ raise TeradataMlException(err_, MessageCodes.DEPENDENT_ARG_MISSING)
957
+
958
+ if self.lbound is None:
959
+ err_ = Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING, "lbound",
960
+ "style={}".format(self.style))
961
+ raise TeradataMlException(err_, MessageCodes.DEPENDENT_ARG_MISSING)
962
+
963
+ if self.ubound is None:
964
+ err_ = Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING, "ubound",
965
+ "style={}".format(self.style))
966
+ raise TeradataMlException(err_, MessageCodes.DEPENDENT_ARG_MISSING)
967
+
968
+ if self.style.upper() == "BOUNDARIES":
969
+ # Parse kwargs now for "boundaries" style argument.
970
+ # Expected arguments are b1, b2, ..., bN.
971
+ # We start extracting each boundary argument one by one and store
972
+ # it's corresponding value that can be used later to generate
973
+ # the correct binstyle syntax.
974
+ parse_kwargs = True
975
+ key_num = 1
976
+ self.__boundaries = []
977
+ while parse_kwargs:
978
+ value = self.kwargs.pop("b{}".format(str(key_num)), None)
979
+ kwarg_info_matrix=[["b{}".format(str(key_num)), value,
980
+ True, (int, float, str, date)]]
981
+
982
+ _Validators._validate_function_arguments(kwarg_info_matrix)
983
+
984
+ key_num = key_num + 1
985
+ if value is None:
986
+ parse_kwargs = False
987
+ else:
988
+ if isinstance(value, date):
989
+ value = UtilFuncs._convert_date_to_string(value)
990
+ self.__boundaries.append(value)
991
+
992
+ # If kwargs still has some extra arguments that means user has
993
+ # passed incorrect argument.
994
+ if len(kwargs) != 0:
995
+ err_ = "Boundary keyword arguments for \"boundaries\" binning style " \
996
+ "must be in sequence as b1, b2, ..., bN. Found: " \
997
+ "{}".format(list(kwargs.keys()))
998
+ raise TypeError(err_)
999
+
1000
+ # After parsing kwargs, if length of self.__boundaries is 0
1001
+ # then we should raise error as boundary values are missing for
1002
+ # this binning style.
1003
+ if len(self.__boundaries) == 0:
1004
+ err_ = Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING,
1005
+ "b1, b2, ..., bN",
1006
+ "style={}".format(self.style))
1007
+ raise TeradataMlException(err_, MessageCodes.DEPENDENT_ARG_MISSING)
1008
+
1009
+ def _val_sql_syntax(self):
1010
+ """
1011
+ DESCRIPTION:
1012
+ Internal function to return a string representation of binning
1013
+ Transformation as required by SQL.
1014
+
1015
+ PARAMETERS:
1016
+ None.
1017
+
1018
+ RETURNS:
1019
+ String representing SQL syntax for 'bincode' SQL argument.
1020
+
1021
+ RAISES:
1022
+ None.
1023
+
1024
+ EXAMPLE:
1025
+ self._val_sql_syntax()
1026
+ """
1027
+ # Generate and add syntax for "binstyle" SQL argument.
1028
+ if self.style.upper() in ["BINS", "QUANTILES", "WIDTH"]:
1029
+ binstyle_arg2 = self.value
1030
+ elif self.style.upper() == "BINSWITHBOUNDARIES":
1031
+ # 'lbound' is provided as date object, convert it to str format
1032
+ if isinstance(self.lbound, date):
1033
+ self.lbound = UtilFuncs._convert_date_to_string(self.lbound)
1034
+
1035
+ # 'ubound' is provided as date object, convert it to str format
1036
+ if isinstance(self.ubound, date):
1037
+ self.ubound = UtilFuncs._convert_date_to_string(self.ubound)
1038
+
1039
+ binstyle_arg2 = "{}, {}, {}".format(self.value, self.lbound, self.ubound)
1040
+ else:
1041
+ binstyle_arg2 = ", ".join([str(v) for v in self.__boundaries])
1042
+
1043
+ ret_value = "binstyle({}, {})".format(self.style.lower(), binstyle_arg2)
1044
+
1045
+ # Generate and add syntax for "columns" and "datatype" SQL arguments.
1046
+ columns_fmt = self._val_transformation_fmt()
1047
+ ret_value = "{}, {}".format(ret_value, columns_fmt)
1048
+
1049
+ if self.fillna:
1050
+ ret_value = "{}, {}".format(ret_value, self.fillna._val_nullstyle_fmt())
1051
+
1052
+ return "{" + ret_value + "}"
1053
+
1054
+
1055
+ class Derive(object):
1056
+ """ Class to represent derive transformation technique. """
1057
+
1058
+ def __init__(self, formula, columns, out_column, datatype=None, fillna=None):
1059
+ """
1060
+ DESCRIPTION:
1061
+ The Derive transformation requires the free-form transformation be specified
1062
+ as a formula using the following operators, arguments, and functions:
1063
+ +, -, **, *, /, %, (, ), x, y, z, abs, exp, ln, log, sqrt
1064
+ The arguments x, y, and z can only assume the value of an input column.
1065
+ An implied multiply operator is automatically inserted when a number, argument
1066
+ (x, y, z), or parenthesis is immediately followed by an argument or parenthesis.
1067
+ For example,
1068
+ 4x means 4*x, xy means x*y, and x(x+1) is equivalent to x*(x+1).
1069
+
1070
+ An example formula for the quadratic equation is below.
1071
+ formula="(-y+sqrt(y**2-4xz))/(2x)"
1072
+
1073
+ Note:
1074
+ Output of this function is passed to "derive" argument of "Transform"
1075
+ function from Vantage Analytic Library.
1076
+
1077
+ PARAMETERS:
1078
+ formula:
1079
+ Required Argument.
1080
+ Specifies the free-form transformation required for Derive.
1081
+ Arithmetic formula can be specified as string using following operators,
1082
+ arguments, and functions:
1083
+ +, -, **, *, /, %, (, ), x, y, z, abs, exp, ln, log, sqrt
1084
+ Types: str
1085
+
1086
+ columns:
1087
+ Required Argument.
1088
+ Specifies the names of the columns to use for formula.
1089
+ Types: str or list of str
1090
+
1091
+ out_column:
1092
+ Required Argument.
1093
+ Specifies the name of the output column.
1094
+ Types: str
1095
+
1096
+ datatype:
1097
+ Optional Argument.
1098
+ Specifies the name of the intended datatype of the output column.
1099
+ Intended data types for the output column can be specified using either the
1100
+ teradatasqlalchemy types or the permitted strings mentioned below:
1101
+ -------------------------------------------------------------------
1102
+ | If intended SQL Data Type is | Permitted Value to be passed is |
1103
+ |-------------------------------------------------------------------|
1104
+ | bigint | bigint |
1105
+ | byteint | byteint |
1106
+ | char(n) | char,n |
1107
+ | date | date |
1108
+ | decimal(m,n) | decimal,m,n |
1109
+ | float | float |
1110
+ | integer | integer |
1111
+ | number(*) | number |
1112
+ | number(n) | number,n |
1113
+ | number(*,n) | number,*,n |
1114
+ | number(n,n) | number,n,n |
1115
+ | smallint | smallint |
1116
+ | time(p) | time,p |
1117
+ | timestamp(p) | timestamp,p |
1118
+ | varchar(n) | varchar,n |
1119
+ --------------------------------------------------------------------
1120
+ Notes:
1121
+ 1. Argument is ignored if "columns" argument is not used.
1122
+ 2. char without a size is not supported.
1123
+ 3. number(*) does not include the * in its datatype format.
1124
+ Examples:
1125
+ 1. If intended datatype for the output column is "bigint", then
1126
+ pass string "bigint" to the argument as shown below:
1127
+ datatype="bigint"
1128
+ 2. If intended datatype for the output column is "decimal(3,5)", then
1129
+ pass string "decimal,3,5" to the argument as shown below:
1130
+ datatype="decimal,3,5"
1131
+ Types: str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
1132
+ TIMESTAMP, VARCHAR.
1133
+
1134
+ fillna:
1135
+ Optional Argument.
1136
+ Specifies whether the null replacement/missing value treatment should
1137
+ be performed with derive or not. Output of FillNa() can be passed to
1138
+ this argument.
1139
+ Note:
1140
+ If the FillNa object is created with its arguments "columns",
1141
+ "out_columns" and "datatype", then values passed in FillNa() arguments
1142
+ are ignored. Only nullstyle information is captured from the same.
1143
+ Types: FillNa
1144
+
1145
+ RETURNS:
1146
+ An instance of Derive class.
1147
+
1148
+ RAISES:
1149
+ TeradataMlException, TypeError, ValueError
1150
+
1151
+ EXAMPLE:
1152
+ # Note:
1153
+ # To run any transformation, user needs to use Transform() function from
1154
+ # Vantage Analytic Library.
1155
+ # To do so import valib first and set the "val_install_location".
1156
+ >>> from teradataml import configure, DataFrame, Derive, FillNa, load_example_data, valib
1157
+ >>> configure.val_install_location = "SYSLIB"
1158
+ >>>
1159
+
1160
+ # Load example data.
1161
+ >>> load_example_data("dataframe", "sales")
1162
+ >>>
1163
+
1164
+ # Create the required DataFrame.
1165
+ >>> sales = DataFrame("sales")
1166
+ >>> sales
1167
+ Feb Jan Mar Apr datetime
1168
+ accounts
1169
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
1170
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
1171
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
1172
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
1173
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
1174
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
1175
+ >>>
1176
+
1177
+ # Example: Includes multiple derive transformations.
1178
+ # Derive transformation 1 is done with 3 variables, x, y, z, to calculate
1179
+ # the total sales for the first quarter for each account.
1180
+ >>> fn_1 = FillNa(style='literal', value=0)
1181
+ >>> dr_1 = Derive(formula="x+y+z", columns=["Jan", "Feb", "Mar"],
1182
+ ... out_column="q1_sales", fillna=fn_1)
1183
+ >>>
1184
+
1185
+ # Derive transformation 2 is done with 2 variables, x, y, to calculate
1186
+ # the sale growth from the month of Jan to Feb.
1187
+ >>> fn_2 = FillNa(style='median')
1188
+ >>> dr_2 = Derive(formula="((y-x)/x)*100", columns=["Jan", "Feb"],
1189
+ ... out_column="feb_growth", fillna=fn_2, datatype='bigint')
1190
+ >>>
1191
+
1192
+ # Execute Transform() function.
1193
+ >>> obj = valib.Transform(data=sales, derive=[dr_1, dr_2], key_columns="accounts")
1194
+ >>> obj.result
1195
+ accounts q1_sales feb_growth
1196
+ 0 Alpha Co 625.0 4
1197
+ 1 Red Inc 490.0 33
1198
+ 2 Orange Inc NaN 40
1199
+ 3 Jones LLC 490.0 33
1200
+ 4 Yellow Inc NaN -40
1201
+ 5 Blue Inc 235.0 79
1202
+ >>>
1203
+ """
1204
+ # Initialize style and value as data members.
1205
+ self.formula = formula
1206
+ self.columns = columns
1207
+ self.out_column = out_column
1208
+ self.datatype = datatype
1209
+ self.fillna = fillna
1210
+
1211
+ # Validations
1212
+ arg_info_matrix = []
1213
+ arg_info_matrix.append(["formula", self.formula, False, str, True])
1214
+ arg_info_matrix.append(["columns", self.columns, False, (str, list), True])
1215
+ arg_info_matrix.append(["out_column", self.out_column, False, str, True])
1216
+ arg_info_matrix.append(["datatype", self.datatype, True, _SuppArgTypes.VAL_ARG_DATATYPE, True])
1217
+ arg_info_matrix.append(["fillna", self.fillna, True, FillNa])
1218
+
1219
+ _Validators._validate_function_arguments(arg_info_matrix)
1220
+
1221
+ def _val_sql_syntax(self):
1222
+ """
1223
+ DESCRIPTION:
1224
+ Internal function to return a string representation of derive
1225
+ Transformation as required by SQL.
1226
+
1227
+ PARAMETERS:
1228
+ None.
1229
+
1230
+ RETURNS:
1231
+ String representing SQL syntax for 'derive' SQL argument.
1232
+
1233
+ RAISES:
1234
+ None.
1235
+
1236
+ EXAMPLE:
1237
+ self._val_sql_syntax()
1238
+ """
1239
+ derive_fmt = "formula(''{}''), arguments({}), outputname({})"
1240
+ arguments = ", ".join(UtilFuncs._as_list(self.columns))
1241
+ ret_value = derive_fmt.format(self.formula, arguments, self.out_column)
1242
+
1243
+ # Generate and add syntax for "datatype" SQL argument.
1244
+ if self.datatype is not None:
1245
+ self.datatype = self.datatype if isinstance(self.datatype, str) else \
1246
+ _DtypesMappers.TDSQLALCHEMY_DATATYPE_TO_VAL_STRING_MAPPER\
1247
+ [type(self.datatype)](self.datatype)
1248
+ ret_value = "{}, datatype({})".format(ret_value, self.datatype)
1249
+
1250
+ # Generate and add syntax for "nullstyle", a SQL arguments.
1251
+ if self.fillna:
1252
+ ret_value = "{}, {}".format(ret_value, self.fillna._val_nullstyle_fmt())
1253
+
1254
+ # Return the SQL syntax for "derive", a SQL argument.
1255
+ return "{" + ret_value + "}"
1256
+
1257
+
1258
+ class OneHotEncoder(_Transformations):
1259
+ """ Class to represent one hot encoding transformation technique. """
1260
+
1261
+ def __init__(self, values, columns, style="dummy", reference_value=None,
1262
+ out_columns=None, datatype=None, fillna=None):
1263
+ """
1264
+ DESCRIPTION:
1265
+ One hot encoding is useful when a categorical data element must be re-expressed
1266
+ as one or more numeric data elements, creating a binary numeric field for
1267
+ each categorical data value. One hot encoding supports character, numeric,
1268
+ and date type columns.
1269
+ One hot encoding is offered in two forms: dummy-coding and contrast-coding.
1270
+ * In dummy-coding, a new column is produced for each listed value, with
1271
+ a value of 0 or 1 depending on whether that value is assumed by the
1272
+ original column. If a column assumes n values, new columns can be
1273
+ created for all n values, (or for only n-1 values, because the nth
1274
+ column is perfectly correlated with the first n-1 columns).
1275
+ * Alternately, given a list of values to contrast-code along with a
1276
+ reference value, a new column is produced for each listed value, with
1277
+ a value of 0 or 1 depending on whether that value is assumed by the
1278
+ original column, or a value of -1 if that original value is equal to
1279
+ the reference value.
1280
+
1281
+ Note:
1282
+ Output of this function is passed to "one_hot_encode" argument of
1283
+ "Transform" function from Vantage Analytic Library.
1284
+
1285
+ PARAMETERS:
1286
+ values:
1287
+ Required Argument.
1288
+ Specifies the values to code and optionally the name of the
1289
+ resulting output column.
1290
+ Note:
1291
+ 1. If date values are entered as string, the keyword 'DATE' must precede
1292
+ the date value, and do not enclose in single quotes OR
1293
+ pass a datetime.date object.
1294
+ For example,
1295
+ value='DATE 1987-06-09'
1296
+ value=date(1987, 6, 9)
1297
+ 2. Use a dict to pass value when result output column is to be named.
1298
+ key of the dictionary must be the value to code and value must be
1299
+ either None, in case result output column is not to be named or a
1300
+ string if it is to be named.
1301
+ For example,
1302
+ values = {"Male": M, "Female": None}
1303
+ In the example above,
1304
+ - we would like to name the output column as 'M' for one hot
1305
+ encoded values for "Male" and
1306
+ - for the one hot encoding values of "Female" we would like to
1307
+ have the output name contain/same as that of "Female", thus
1308
+ None is passed as a value.
1309
+ Types: bool, float, int, str, dict, datetime.date or list of booleans, floats, integers,
1310
+ strings, datetime.date
1311
+
1312
+ columns:
1313
+ Required Argument.
1314
+ Specifies the name of the column. Value passed to this argument
1315
+ also plays a crucial role in determining the output column name.
1316
+ Types: str
1317
+
1318
+ style:
1319
+ Optional Argument.
1320
+ Specifies the one hot encoding style to use.
1321
+ Permitted Values: 'dummy', 'contrast'
1322
+ Default Value: 'dummy'
1323
+ Types: str
1324
+
1325
+ reference_value:
1326
+ Required Argument when "style" is 'contrast', ignored otherwise.
1327
+ Specifies the reference value to use for 'contrast' style. If original
1328
+ value in the column is equal to the reference value then -1 is returned
1329
+ for the same.
1330
+ Types: bool, int, float, str, datetme.date
1331
+
1332
+ out_columns:
1333
+ Optional Argument.
1334
+ Specifies the name of the output column. Value passed to this argument
1335
+ also plays a crucial role in determining the output column name.
1336
+ Types: str
1337
+
1338
+ datatype:
1339
+ Optional Argument.
1340
+ Specifies the name of the intended datatype of the output column.
1341
+ Intended data types for the output column can be specified using either the
1342
+ teradatasqlalchemy types or the permitted strings mentioned below:
1343
+ -------------------------------------------------------------------
1344
+ | If intended SQL Data Type is | Permitted Value to be passed is |
1345
+ |-------------------------------------------------------------------|
1346
+ | bigint | bigint |
1347
+ | byteint | byteint |
1348
+ | char(n) | char,n |
1349
+ | date | date |
1350
+ | decimal(m,n) | decimal,m,n |
1351
+ | float | float |
1352
+ | integer | integer |
1353
+ | number(*) | number |
1354
+ | number(n) | number,n |
1355
+ | number(*,n) | number,*,n |
1356
+ | number(n,n) | number,n,n |
1357
+ | smallint | smallint |
1358
+ | time(p) | time,p |
1359
+ | timestamp(p) | timestamp,p |
1360
+ | varchar(n) | varchar,n |
1361
+ --------------------------------------------------------------------
1362
+ Notes:
1363
+ 1. Argument is ignored if "columns" argument is not used.
1364
+ 2. char without a size is not supported.
1365
+ 3. number(*) does not include the * in its datatype format.
1366
+ Examples:
1367
+ 1. If intended datatype for the output column is "bigint", then
1368
+ pass string "bigint" to the argument as shown below:
1369
+ datatype="bigint"
1370
+ 2. If intended datatype for the output column is "decimal(3,5)", then
1371
+ pass string "decimal,3,5" to the argument as shown below:
1372
+ datatype="decimal,3,5"
1373
+ Types: str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
1374
+ TIMESTAMP, VARCHAR.
1375
+
1376
+ fillna:
1377
+ Optional Argument.
1378
+ Specifies whether the null replacement/missing value treatment should
1379
+ be performed with one hot encoding or not. Output of FillNa() can be
1380
+ passed to this argument.
1381
+ Note:
1382
+ If the FillNa object is created with its arguments "columns",
1383
+ "out_columns" and "datatype", then values passed in FillNa() arguments
1384
+ are ignored. Only nullstyle information is captured from the same.
1385
+ Types: FillNa
1386
+
1387
+ NOTES:
1388
+ Output column names for the transformation using Transform() function depends
1389
+ on "values", "columns" and "out_columns" arguments. Here is how output column
1390
+ names are determined:
1391
+ 1. If "values" is not dictionary:
1392
+ a. If "out_columns" is not passed, then output column is formed
1393
+ using the value in "values" and column name passed to "columns".
1394
+ For example,
1395
+ If values=["val1", "val2"] and columns="col"
1396
+ then, output column names are:
1397
+ 'val1_col' and 'val2_col'
1398
+ b. If "out_columns" is passed, then output column is formed
1399
+ using the value in "values" and column name passed to "out_columns".
1400
+ For example,
1401
+ If values=["val1", "val2"], columns="col", and
1402
+ out_columns="ocol" then, output column names are:
1403
+ 'val1_ocol' and 'val2_ocol'
1404
+ 2. If "values" is a dictionary:
1405
+ a. If value in a dictionary is not None, then that value is used
1406
+ as output column name.
1407
+ For example:
1408
+ If values = {"val1": "v1"} then output column name is "v1".
1409
+ b. If value in a dictionary is None, then rules specified in point 1
1410
+ are applied to determine the output column name.
1411
+
1412
+ RETURNS:
1413
+ An instance of OneHotEncoder class.
1414
+
1415
+ RAISES:
1416
+ TeradataMlException, TypeError, ValueError
1417
+
1418
+ EXAMPLE:
1419
+ # Note:
1420
+ # To run any transformation, user needs to use Transform() function from
1421
+ # Vantage Analytic Library.
1422
+ # To do so import valib first and set the "val_install_location".
1423
+ >>> from teradataml import configure, DataFrame, OneHotEncoder, FillNa, load_example_data, valib
1424
+ >>> configure.val_install_location = "SYSLIB"
1425
+ >>>
1426
+
1427
+ # Load example data.
1428
+ >>> load_example_data("dataframe", "admissions_train")
1429
+ >>>
1430
+
1431
+ # Create the required DataFrame.
1432
+ >>> df = DataFrame("admissions_train")
1433
+ >>> df
1434
+ masters gpa stats programming admitted
1435
+ id
1436
+ 13 no 4.00 Advanced Novice 1
1437
+ 26 yes 3.57 Advanced Advanced 1
1438
+ 5 no 3.44 Novice Novice 0
1439
+ 19 yes 1.98 Advanced Advanced 0
1440
+ 15 yes 4.00 Advanced Advanced 1
1441
+ 40 yes 3.95 Novice Beginner 0
1442
+ 7 yes 2.33 Novice Novice 1
1443
+ 22 yes 3.46 Novice Beginner 0
1444
+ 36 no 3.00 Advanced Novice 0
1445
+ 38 yes 2.65 Advanced Beginner 1
1446
+ >>>
1447
+
1448
+ # Example 1: Encode all values 'Novice', 'Advanced', and 'Beginner'
1449
+ # in "programming" column using "dummy" style.
1450
+ >>> dc = OneHotEncoder(values=["Novice", "Advanced", "Beginner"], columns="programming")
1451
+
1452
+ # Execute Transform() function.
1453
+ >>> obj = valib.Transform(data=df, one_hot_encode=dc, key_columns="id")
1454
+ >>> obj.result
1455
+ id Novice_programming Advanced_programming Beginner_programming
1456
+ 0 5 1 0 0
1457
+ 1 34 0 0 1
1458
+ 2 13 1 0 0
1459
+ 3 40 0 0 1
1460
+ 4 22 0 0 1
1461
+ 5 19 0 1 0
1462
+ 6 36 1 0 0
1463
+ 7 15 0 1 0
1464
+ 8 7 1 0 0
1465
+ 9 17 0 1 0
1466
+ >>>
1467
+
1468
+
1469
+ # Example 2: Encode all values 'Novice', 'Advanced', and 'Beginner'
1470
+ # in "programming" column using "dummy" style. Also, pass
1471
+ # "out_columns" argument, to control the name of the output column.
1472
+ >>> dc = OneHotEncoder(style="dummy", values=["Novice", "Advanced", "Beginner"],
1473
+ ... columns="programming", out_columns="prog")
1474
+
1475
+ # Execute Transform() function.
1476
+ >>> obj = valib.Transform(data=df, one_hot_encode=dc, key_columns="id")
1477
+ >>> obj.result
1478
+ id Novice_prog Advanced_prog Beginner_prog
1479
+ 0 15 0 1 0
1480
+ 1 7 1 0 0
1481
+ 2 22 0 0 1
1482
+ 3 17 0 1 0
1483
+ 4 13 1 0 0
1484
+ 5 38 0 0 1
1485
+ 6 26 0 1 0
1486
+ 7 5 1 0 0
1487
+ 8 34 0 0 1
1488
+ 9 40 0 0 1
1489
+ >>>
1490
+
1491
+
1492
+ # Example 3: Encode all values 'Novice', 'Advanced', and 'Beginner'
1493
+ # in "programming" column using "dummy" style. Example shows
1494
+ # why and how to pass values using dictionary. By passing dictionary,
1495
+ # we should be able to control the name of the output columns.
1496
+ # In this example, we would like to name the output column for
1497
+ # value 'Advanced' as 'Adv', 'Beginner' as 'Beg' and for 'Novice'
1498
+ # we would like to use default mechanism.
1499
+ >>> values = {"Novice": None, "Advanced": "Adv", "Beginner": "Beg"}
1500
+ >>> dc = OneHotEncoder(style="dummy", values=values, columns="programming")
1501
+
1502
+ # Execute Transform() function.
1503
+ >>> obj = valib.Transform(data=df, one_hot_encode=dc, key_columns="id")
1504
+ >>> obj.result
1505
+ id Novice_programming Adv Beg
1506
+ 0 13 1 0 0
1507
+ 1 26 0 1 0
1508
+ 2 5 1 0 0
1509
+ 3 19 0 1 0
1510
+ 4 15 0 1 0
1511
+ 5 40 0 0 1
1512
+ 6 7 1 0 0
1513
+ 7 22 0 0 1
1514
+ 8 36 1 0 0
1515
+ 9 38 0 0 1
1516
+ >>>
1517
+
1518
+
1519
+ # Example 4: Encode all values 'Novice', 'Advanced', and 'Beginner'
1520
+ # in "programming" column using "dummy" style.
1521
+ # Example shows controling of the output column name with dictionary
1522
+ # and "out_columns" argument.
1523
+ # In this example, we would like to name the output column for
1524
+ # value 'Advanced' as 'Adv', 'Beginner' as 'Beg', 'Novice' as 'Nov_prog'.
1525
+ >>> values = {"Novice": None, "Advanced": "Adv", "Beginner": "Beg"}
1526
+ >>> dc = OneHotEncoder(style="dummy", values=values, columns="programming",
1527
+ ... out_columns="prog")
1528
+
1529
+ # Execute Transform() function.
1530
+ >>> obj = valib.Transform(data=df, one_hot_encode=dc, key_columns="id")
1531
+ >>> obj.result
1532
+ id Novice_prog Adv Beg
1533
+ 0 15 0 1 0
1534
+ 1 7 1 0 0
1535
+ 2 22 0 0 1
1536
+ 3 17 0 1 0
1537
+ 4 13 1 0 0
1538
+ 5 38 0 0 1
1539
+ 6 26 0 1 0
1540
+ 7 5 1 0 0
1541
+ 8 34 0 0 1
1542
+ 9 40 0 0 1
1543
+ >>>
1544
+
1545
+
1546
+ # Example 5: Encode 'yes' value in "masters" column using "contrast" style
1547
+ # with reference value as 0.
1548
+ >>> dc = OneHotEncoder(style="contrast", values="yes", reference_value=0,
1549
+ ... columns="masters")
1550
+
1551
+ # Execute Transform() function.
1552
+ >>> obj = valib.Transform(data=df, one_hot_encode=dc, key_columns="id")
1553
+ >>> obj.result
1554
+ id yes_masters
1555
+ 0 15 1
1556
+ 1 7 1
1557
+ 2 22 1
1558
+ 3 17 0
1559
+ 4 13 0
1560
+ 5 38 1
1561
+ 6 26 1
1562
+ 7 5 0
1563
+ 8 34 1
1564
+ 9 40 1
1565
+ >>>
1566
+
1567
+
1568
+ # Example 6: Encode all values in "programming" column using "contrast" style
1569
+ # with reference_value as 'Advanced'.
1570
+ >>> values = {"Advanced": "Adv", "Beginner": "Beg", "Novice": "Nov"}
1571
+ >>> dc = OneHotEncoder(style="contrast", values=values, reference_value="Advanced",
1572
+ ... columns="programming")
1573
+
1574
+ # Execute Transform() function.
1575
+ >>> obj = valib.Transform(data=df, one_hot_encode=dc, key_columns="id")
1576
+ >>> obj.result
1577
+ id Adv Beg Nov
1578
+ 0 15 1 -1 -1
1579
+ 1 7 0 0 1
1580
+ 2 22 0 1 0
1581
+ 3 17 1 -1 -1
1582
+ 4 13 0 0 1
1583
+ 5 38 0 1 0
1584
+ 6 26 1 -1 -1
1585
+ 7 5 0 0 1
1586
+ 8 34 0 1 0
1587
+ 9 40 0 1 0
1588
+ >>>
1589
+
1590
+
1591
+ # Example 7: Example shows combining multiple one hot encoding styles on
1592
+ # different columns.
1593
+
1594
+ # Encode all values in 'programming' column using 'dummy' encoding style.
1595
+ >>> dc_prog_dummy = OneHotEncoder(values=["Novice", "Advanced", "Beginner"],
1596
+ ... columns="programming", out_columns="prog")
1597
+ >>>
1598
+
1599
+ # Encode all values in 'stats' column using 'dummy' encoding style.
1600
+ # Also, combine it with null replacement.
1601
+ >>> values = {"Advanced": "Adv", "Beginner": "Beg"}
1602
+ >>> fillna = FillNa("literal", "Advanced")
1603
+ >>> dc_stats_dummy = OneHotEncoder(values=values, columns="stats", fillna=fillna)
1604
+ >>>
1605
+
1606
+ # Encode 'yes' in 'masters' column using 'contrast' encoding style.
1607
+ # Reference value used is 'no'.
1608
+ >>> dc_mast_contrast = OneHotEncoder(style="contrast", values="yes", reference_value="no",
1609
+ ... columns="masters")
1610
+ >>>
1611
+
1612
+ # Encode all values in 'programming' column using 'contrast' encoding style.
1613
+ # Reference value used is 'Advanced'.
1614
+ >>> dc_prog_contrast = OneHotEncoder(style="contrast",
1615
+ ... values=["Novice", "Advanced", "Beginner"],
1616
+ ... reference_value="Advanced",
1617
+ ... columns="programming")
1618
+ >>>
1619
+
1620
+ # Execute Transform() function.
1621
+ >>> obj = valib.Transform(data=df,
1622
+ ... one_hot_encode=[dc_prog_dummy, dc_stats_dummy,
1623
+ ... dc_mast_contrast, dc_prog_contrast],
1624
+ ... key_columns="id")
1625
+ >>> obj.result
1626
+ id Novice_prog Advanced_prog Beginner_prog Adv Beg yes_masters Novice_programming Advanced_programming Beginner_programming
1627
+ 0 13 1 0 0 1 0 -1 1 0 0
1628
+ 1 26 0 1 0 1 0 1 -1 1 -1
1629
+ 2 5 1 0 0 0 0 -1 1 0 0
1630
+ 3 19 0 1 0 1 0 1 -1 1 -1
1631
+ 4 15 0 1 0 1 0 1 -1 1 -1
1632
+ 5 40 0 0 1 0 0 1 0 0 1
1633
+ 6 7 1 0 0 0 0 1 1 0 0
1634
+ 7 22 0 0 1 0 0 1 0 0 1
1635
+ 8 36 1 0 0 1 0 -1 1 0 0
1636
+ 9 38 0 0 1 1 0 1 0 0 1
1637
+ >>>
1638
+ """
1639
+ # Initialize style and value as data members.
1640
+ self.style = style
1641
+ self.values = values
1642
+ self.reference_value = reference_value
1643
+ self.fillna = fillna
1644
+ self.columns = columns
1645
+ self.out_columns = out_columns
1646
+
1647
+ # Validations
1648
+ arg_info_matrix = []
1649
+ permitted_styles = ["DUMMY", "CONTRAST"]
1650
+ arg_info_matrix.append(["style", self.style, True, str, True, permitted_styles])
1651
+ arg_info_matrix.append(["values", self.values, False,
1652
+ (bool, float, int, str, list, dict, date)])
1653
+ arg_info_matrix.append(["reference_value", self.reference_value, True,
1654
+ (bool, int, float, str, date)])
1655
+ arg_info_matrix.append(["fillna", self.fillna, True, FillNa])
1656
+ # "columns" and "out_columns" they can only accept a string, hence are being validated
1657
+ # here.
1658
+ arg_info_matrix.append(["columns", self.columns, False, str])
1659
+ arg_info_matrix.append(["out_columns", self.out_columns, True, str])
1660
+ # Other argument validations.
1661
+ _Validators._validate_function_arguments(arg_info_matrix)
1662
+
1663
+ # Call super()
1664
+ super().__init__(columns=columns, out_columns=out_columns, datatype=datatype)
1665
+ # Note:
1666
+ # Validations for "datatype" is done by super().
1667
+
1668
+ # "reference_value" should be provided when "style" is 'contrast'.
1669
+ if self.style.upper() == "CONTRAST" and self.reference_value is None:
1670
+ err_ = Messages.get_message(MessageCodes.DEPENDENT_ARG_MISSING,
1671
+ "reference_value",
1672
+ "style={}".format(self.style))
1673
+ raise TeradataMlException(err_, MessageCodes.DEPENDENT_ARG_MISSING)
1674
+
1675
+ if isinstance(self.reference_value, date):
1676
+ self.reference_value = UtilFuncs._convert_date_to_string(self.reference_value)
1677
+
1678
+ def _val_sql_syntax(self):
1679
+ """
1680
+ DESCRIPTION:
1681
+ Internal function to return a string representation of design code
1682
+ Transformation as required by SQL.
1683
+
1684
+ PARAMETERS:
1685
+ None.
1686
+
1687
+ RETURNS:
1688
+ String representing SQL syntax for 'designcode' SQL argument.
1689
+
1690
+ RAISES:
1691
+ None.
1692
+
1693
+ EXAMPLE:
1694
+ self._val_sql_syntax()
1695
+ """
1696
+ # Generate syntax for "designstyle" and "designvalues" SQL arguments.
1697
+ design_style = "dummycode"
1698
+ if self.style.upper() == "CONTRAST":
1699
+ design_style = "contrastcode, {}".format(self.reference_value)
1700
+
1701
+ if isinstance(self.values, list):
1702
+ self.values = [
1703
+ UtilFuncs._convert_date_to_string(val) if isinstance(val, date) else val \
1704
+ for val in self.values]
1705
+ design_values = [str(val) if not isinstance(val, str) else val for val in self.values]
1706
+ design_values = ", ".join(design_values)
1707
+ elif isinstance(self.values, dict):
1708
+ values = []
1709
+ for val in self.values:
1710
+ if self.values[val] is not None:
1711
+ if isinstance(self.values[val], date):
1712
+ self.values[val] = UtilFuncs._convert_date_to_string(self.values[val])
1713
+ values.append("{}/{}".format(val, self.values[val]))
1714
+ else:
1715
+ values.append(str(val))
1716
+ design_values = ", ".join(values)
1717
+ elif isinstance(self.values, date):
1718
+ design_values = UtilFuncs._convert_date_to_string(self.values)
1719
+ else:
1720
+ design_values = self.values
1721
+
1722
+ ret_value = "designstyle({}), designvalues({})".format(design_style,
1723
+ design_values)
1724
+
1725
+ # Generate and add syntax for "columns" and "datatype" SQL arguments.
1726
+ columns_fmt = self._val_transformation_fmt()
1727
+ ret_value = "{}, {}".format(ret_value, columns_fmt)
1728
+
1729
+ # Generate and add syntax for "nullstyle", a SQL arguments.
1730
+ if self.fillna:
1731
+ ret_value = "{}, {}".format(ret_value, self.fillna._val_nullstyle_fmt())
1732
+
1733
+ # Return the SQL syntax for "designcode", a SQL argument.
1734
+ return "{" + ret_value + "}"
1735
+
1736
+
1737
+ class LabelEncoder(_Transformations):
1738
+ """
1739
+ Class to represent label encoding, i.e., variable recoding transformation technique.
1740
+ """
1741
+
1742
+ def __init__(self, values, columns, default=None, out_columns=None, datatype=None,
1743
+ fillna=None):
1744
+ """
1745
+ DESCRIPTION:
1746
+ Label encoding a categorical data column is done to re-express existing values
1747
+ of a column (variable) into a new coding scheme or to correct data quality
1748
+ problems and focus an analysis of a particular value. It allows for mapping
1749
+ individual values, NULL values, or any number of remaining values (ELSE
1750
+ option) to a new value, a NULL value or the same value.
1751
+ Label encoding supports character, numeric, and date type columns.
1752
+
1753
+ Note:
1754
+ Output of this function is passed to "label_encode" argument of "Transform"
1755
+ function from Vantage Analytic Library.
1756
+
1757
+ PARAMETERS:
1758
+ values:
1759
+ Required Argument.
1760
+ Specifies the values to be label encoded. Values can be specified in
1761
+ two formats:
1762
+ 1. A list of two-tuples, where first value in the tuple is a
1763
+ old value and second value is a new value.
1764
+ For example,
1765
+ values = [(old_val1, new_val2), (old_val2, new_val2)]
1766
+ 2. A dictionary with key as old value and value as new value.
1767
+ For example,
1768
+ values = {old_val1: new_val2, old_val2: new_val2}
1769
+ Note:
1770
+ 1. If date values are entered as string, the keyword 'DATE' must precede
1771
+ the date value, and do not enclose in single quotes OR
1772
+ pass a datetime.date object.
1773
+ For example,
1774
+ value='DATE 1987-06-09'
1775
+ value=date(1987, 6, 9)
1776
+ 2. To keep the old value as is, one can pass 'same' as it's new value.
1777
+ 3. To use NULL values for old or new value, one can either use string
1778
+ 'null' or None.
1779
+ Types: two-tuple, list of two-tuples, dict
1780
+
1781
+ columns:
1782
+ Required Argument.
1783
+ Specifies the names of the columns containing values to be label encoded.
1784
+ Types: str or list of str
1785
+
1786
+ default:
1787
+ Optional Argument.
1788
+ Specifies the value assumed for all other cases.
1789
+ Permitted Values: None, 'SAME', 'NULL', a literal
1790
+ Default Value: None
1791
+ Types: bool, float, int, str
1792
+
1793
+ out_columns:
1794
+ Optional Argument.
1795
+ Specifies the names of the output columns. Value passed to this argument
1796
+ also plays a crucial role in determining the output column name.
1797
+ Note:
1798
+ Number of elements in "columns" and "out_columns" must be same.
1799
+ Types: str or list of str
1800
+
1801
+ datatype:
1802
+ Optional Argument.
1803
+ Specifies the name of the intended datatype of the output column.
1804
+ Intended data types for the output column can be specified using either the
1805
+ teradatasqlalchemy types or the permitted strings mentioned below:
1806
+ -------------------------------------------------------------------
1807
+ | If intended SQL Data Type is | Permitted Value to be passed is |
1808
+ |-------------------------------------------------------------------|
1809
+ | bigint | bigint |
1810
+ | byteint | byteint |
1811
+ | char(n) | char,n |
1812
+ | date | date |
1813
+ | decimal(m,n) | decimal,m,n |
1814
+ | float | float |
1815
+ | integer | integer |
1816
+ | number(*) | number |
1817
+ | number(n) | number,n |
1818
+ | number(*,n) | number,*,n |
1819
+ | number(n,n) | number,n,n |
1820
+ | smallint | smallint |
1821
+ | time(p) | time,p |
1822
+ | timestamp(p) | timestamp,p |
1823
+ | varchar(n) | varchar,n |
1824
+ --------------------------------------------------------------------
1825
+ Notes:
1826
+ 1. Argument is ignored if "columns" argument is not used.
1827
+ 2. char without a size is not supported.
1828
+ 3. number(*) does not include the * in its datatype format.
1829
+ Examples:
1830
+ 1. If intended datatype for the output column is "bigint", then
1831
+ pass string "bigint" to the argument as shown below:
1832
+ datatype="bigint"
1833
+ 2. If intended datatype for the output column is "decimal(3,5)", then
1834
+ pass string "decimal,3,5" to the argument as shown below:
1835
+ datatype="decimal,3,5"
1836
+ Types: str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
1837
+ TIMESTAMP, VARCHAR.
1838
+
1839
+ fillna:
1840
+ Optional Argument.
1841
+ Specifies whether the null replacement/missing value treatment should
1842
+ be performed with recoding or not. Output of FillNa() can be passed to
1843
+ this argument.
1844
+ Note:
1845
+ If the FillNa object is created with its arguments "columns",
1846
+ "out_columns" and "datatype", then values passed in FillNa() arguments
1847
+ are ignored. Only nullstyle information is captured from the same.
1848
+ Types: FillNa
1849
+
1850
+ RETURNS:
1851
+ An instance of LabelEncoder class.
1852
+
1853
+ RAISES:
1854
+ TeradataMlException, TypeError, ValueError
1855
+
1856
+ EXAMPLE:
1857
+ # Note:
1858
+ # To run any transformation, user needs to use Transform() function from
1859
+ # Vantage Analytic Library.
1860
+ # To do so import valib first and set the "val_install_location".
1861
+ >>> from teradataml import configure, DataFrame, LabelEncoder, FillNa, load_example_data, valib
1862
+ >>> configure.val_install_location = "SYSLIB"
1863
+ >>>
1864
+
1865
+ # Load example data.
1866
+ >>> load_example_data("dataframe", "admissions_train")
1867
+ >>>
1868
+
1869
+ # Create the required DataFrame.
1870
+ >>> admissions_train = DataFrame("admissions_train")
1871
+ >>> admissions_train
1872
+ masters gpa stats programming admitted
1873
+ id
1874
+ 13 no 4.00 Advanced Novice 1
1875
+ 26 yes 3.57 Advanced Advanced 1
1876
+ 5 no 3.44 Novice Novice 0
1877
+ 19 yes 1.98 Advanced Advanced 0
1878
+ 15 yes 4.00 Advanced Advanced 1
1879
+ 40 yes 3.95 Novice Beginner 0
1880
+ 7 yes 2.33 Novice Novice 1
1881
+ 22 yes 3.46 Novice Beginner 0
1882
+ 36 no 3.00 Advanced Novice 0
1883
+ 38 yes 2.65 Advanced Beginner 1
1884
+ >>>
1885
+
1886
+ # Example 1: Recode all values 'Novice', 'Advanced', and 'Beginner'
1887
+ # in "programming" and "stats" columns.
1888
+ # We will pass values to "label_encode" as dictionary.
1889
+ >>> rc = LabelEncoder(values={"Novice": 1, "Advanced": 2, "Beginner": 3}, columns=["stats", "programming"])
1890
+
1891
+ # Execute Transform() function.
1892
+ >>> obj = valib.Transform(data=admissions_train, label_encode=rc)
1893
+ >>> obj.result
1894
+ id stats programming
1895
+ 0 22 1 3
1896
+ 1 36 2 1
1897
+ 2 15 2 2
1898
+ 3 38 2 3
1899
+ 4 5 1 1
1900
+ 5 17 2 2
1901
+ 6 34 2 3
1902
+ 7 13 2 1
1903
+ 8 26 2 2
1904
+ 9 19 2 2
1905
+ >>>
1906
+
1907
+ # Example 2: Recode value 'Novice' as 1 which is passed as tuple to "values"
1908
+ # argument and "label_encode" other values as 0 by passing it to "default"
1909
+ # argument in "programming" and "stats" columns.
1910
+ >>> rc = LabelEncoder(values=("Novice", 1), columns=["stats", "programming"], default=0)
1911
+
1912
+ # Execute Transform() function.
1913
+ >>> obj = valib.Transform(data=admissions_train, label_encode=rc)
1914
+ >>> obj.result
1915
+ id stats programming
1916
+ 0 15 0 0
1917
+ 1 7 1 1
1918
+ 2 22 1 0
1919
+ 3 17 0 0
1920
+ 4 13 0 1
1921
+ 5 38 0 0
1922
+ 6 26 0 0
1923
+ 7 5 1 1
1924
+ 8 34 0 0
1925
+ 9 40 1 0
1926
+ >>>
1927
+
1928
+ # Example 3: In this example we encode values differently for multiple columns.
1929
+
1930
+ # For values in "programming" column, recoding will be done as follows:
1931
+ # Novice --> 0
1932
+ # Advanced --> 1 and
1933
+ # Rest of the values as --> NULL
1934
+ >>> rc_prog = LabelEncoder(values=[("Novice", 0), ("Advanced", 1)], columns="programming",
1935
+ ... default=None)
1936
+ >>>
1937
+
1938
+ # For values in "stats" column, recoding will be done as follows:
1939
+ # Novice --> N
1940
+ # Advanced --> keep it as is and
1941
+ # Beginner --> NULL
1942
+ >>> rc_stats = LabelEncoder(values={"Novice": 0, "Advanced": "same", "Beginner": None},
1943
+ ... columns="stats")
1944
+ >>>
1945
+
1946
+ # For values in "masters" column, recoding will be done as follows:
1947
+ # yes --> 1 and other as 0
1948
+ >>> rc_yes = LabelEncoder(values=("yes", 1), columns="masters", default=0,
1949
+ ... out_columns="masters_yes")
1950
+ >>>
1951
+
1952
+ # For values in "masters" column, label encoding will be done as follows:
1953
+ # no --> 1 and other as 0
1954
+ >>> rc_no = LabelEncoder(values=("no", 1), columns="masters", default=0,
1955
+ ... out_columns="masters_no")
1956
+ >>>
1957
+
1958
+ # Execute Transform() function.
1959
+ >>> obj = valib.Transform(data=admissions_train, label_encode=[rc_prog, rc_stats, rc_yes,
1960
+ ... rc_no])
1961
+ >>> obj.result
1962
+ id programming stats masters_yes masters_no
1963
+ 0 13 0 Advanced 0 1
1964
+ 1 26 1 Advanced 1 0
1965
+ 2 5 0 0 0 1
1966
+ 3 19 1 Advanced 1 0
1967
+ 4 15 1 Advanced 1 0
1968
+ 5 40 None 0 1 0
1969
+ 6 7 0 0 1 0
1970
+ 7 22 None 0 1 0
1971
+ 8 36 0 Advanced 0 1
1972
+ 9 38 None Advanced 1 0
1973
+ >>>
1974
+ """
1975
+ # Call super()
1976
+ super().__init__(columns=columns, out_columns=out_columns, datatype=datatype,
1977
+ columns_optional=False)
1978
+
1979
+ # Initialize style and value as data members.
1980
+ self.values = values
1981
+ self.default = default
1982
+ self.fillna = fillna
1983
+
1984
+ # Validations
1985
+ if isinstance(self.values, tuple):
1986
+ if len(self.values) != 2:
1987
+ raise ValueError("Number of values in a tuple can only be 2.")
1988
+ elif isinstance(self.values, list):
1989
+ for tup in self.values:
1990
+ if not isinstance(tup, tuple):
1991
+ err_ = Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE)
1992
+ raise TypeError(err_.format("values", ['tuple or dict or list of tuples']))
1993
+
1994
+ if len(tup) != 2:
1995
+ raise ValueError("Number of values in a tuple can only be 2.")
1996
+
1997
+ elif not isinstance(self.values, dict):
1998
+ err_ = Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE)
1999
+ raise TypeError(err_.format("values", ['tuple or dict or list of tuples']))
2000
+
2001
+ arg_info_matrix = []
2002
+ arg_info_matrix.append(["values", self.values, False, (tuple, list, dict)])
2003
+ arg_info_matrix.append(["default", self.default, True, (bool, int, float, str)])
2004
+ arg_info_matrix.append(["fillna", self.fillna, True, FillNa])
2005
+
2006
+ # Other argument validations.
2007
+ _Validators._validate_function_arguments(arg_info_matrix)
2008
+ # Note:
2009
+ # Validations for "columns", "out_column" and "datatype" is done by super().
2010
+
2011
+ def _val_sql_syntax(self):
2012
+ """
2013
+ DESCRIPTION:
2014
+ Internal function to return a string representation of LabelEncoder
2015
+ Transformation as required by SQL.
2016
+
2017
+ PARAMETERS:
2018
+ None.
2019
+
2020
+ RETURNS:
2021
+ String representing SQL syntax for 'recode' SQL argument.
2022
+
2023
+ RAISES:
2024
+ None.
2025
+
2026
+ EXAMPLE:
2027
+ self._val_sql_syntax()
2028
+ """
2029
+ # Generate syntax for "recodevalues".
2030
+ if isinstance(self.values, tuple):
2031
+ old_val = self._get_value_string_repr(self.values[0])
2032
+ new_val = self._get_value_string_repr(self.values[1])
2033
+ recode_values = "{}/{}".format(old_val, new_val)
2034
+ elif isinstance(self.values, list):
2035
+ recode_values = []
2036
+ for val in self.values:
2037
+ old_val = self._get_value_string_repr(val[0])
2038
+ new_val = self._get_value_string_repr(val[1])
2039
+ recode_values.append("{}/{}".format(old_val, new_val))
2040
+ recode_values = ", ".join(recode_values)
2041
+ else:
2042
+ recode_values = []
2043
+ for key in self.values:
2044
+ old_val = self._get_value_string_repr(key)
2045
+ new_val = self._get_value_string_repr(self.values[key])
2046
+ recode_values.append("{}/{}".format(old_val, new_val))
2047
+ recode_values = ", ".join(recode_values)
2048
+
2049
+ recode_other = "NULL" if self.default is None else self.default
2050
+
2051
+ ret_value = "recodevalues({}), recodeother({})".format(recode_values,
2052
+ recode_other)
2053
+
2054
+ # Generate and add syntax for "columns" and "datatype" SQL arguments.
2055
+ columns_fmt = self._val_transformation_fmt()
2056
+ ret_value = "{}, {}".format(ret_value, columns_fmt)
2057
+
2058
+ # Generate and add syntax for "nullstyle", a SQL arguments.
2059
+ if self.fillna:
2060
+ ret_value = "{}, {}".format(ret_value, self.fillna._val_nullstyle_fmt())
2061
+ # Return the SQL syntax for "recode", a SQL argument.
2062
+ return "{" + ret_value + "}"
2063
+
2064
+ def _get_value_string_repr(self, value):
2065
+ """
2066
+ DESCRIPTION:
2067
+ Internal function to return a string representation of given value if required.
2068
+
2069
+ PARAMETERS:
2070
+ value:
2071
+ Required Argument.
2072
+ Specifies the value to perform conversion on.
2073
+ Types: str, bool, float, None, datetime.date
2074
+
2075
+ RETURNS:
2076
+ String representation of passed argument.
2077
+
2078
+ RAISES:
2079
+ None.
2080
+
2081
+ Examples:
2082
+ self._get_value_string_repr(key)
2083
+
2084
+ """
2085
+ if value is not None and isinstance(value, date):
2086
+ updated_val = UtilFuncs._convert_date_to_string(value)
2087
+ elif value is None:
2088
+ updated_val = "NULL"
2089
+ elif value == "":
2090
+ updated_val = "\"\""
2091
+ else:
2092
+ updated_val = value
2093
+ return updated_val
2094
+
2095
+
2096
+ class MinMaxScalar(_Transformations):
2097
+ """ Class to represent rescale transformation technique. """
2098
+
2099
+ def __init__(self, columns, lbound=0, ubound=1, out_columns=None, datatype=None,
2100
+ fillna=None):
2101
+ """
2102
+ DESCRIPTION:
2103
+ MinMaxScalar allows rescaling that limits the upper and lower boundaries of the
2104
+ data in a continuous numeric column using a linear rescaling function based on
2105
+ maximum and minimum data values. MinMaxScalar is useful with algorithms that require
2106
+ or work better with data within a certain range. MinMaxScalar is only valid on numeric
2107
+ columns, and not columns of type date.
2108
+
2109
+ The rescale transformation formulas are shown in the following examples.
2110
+ The l denotes the left bound and r denotes the right bound.
2111
+ * When both the lower and upper bounds are specified:
2112
+ f(x,l,r) = (l+(x-min(x))(r-l))/(max(x)-min(x))
2113
+ * When only the lower bound is specified:
2114
+ f(x,l) = x-min(x)+l
2115
+ * When only the upper bound is specified:
2116
+ f(x,r) = x-max(x)+r
2117
+ Rescaling supports only numeric type columns.
2118
+
2119
+ Note:
2120
+ Output of this function is passed to "rescale" argument of "Transform"
2121
+ function from Vantage Analytic Library.
2122
+
2123
+ PARAMETERS:
2124
+ columns:
2125
+ Required Argument.
2126
+ Specifies the names of the columns to perform transformation on.
2127
+ Types: str or list of str
2128
+
2129
+ lbound:
2130
+ Optional Argument.
2131
+ Specifies the lowerbound value required for rescaling the numeric data.
2132
+ If only the lower boundary is supplied, the variable is aligned to this
2133
+ value. This can be achieved by passing None to "ubound" argument.
2134
+ Default Value: 0
2135
+ Types: float, int
2136
+
2137
+ ubound:
2138
+ Optional Argument.
2139
+ Specifies the upperbound value required for rescaling the numeric data.
2140
+ If only an upper boundary value is specified, the variable is aligned to
2141
+ this value. This can be achieved by passing None to "lbound" argument.
2142
+ Default Value: 1
2143
+ Types: float, int
2144
+
2145
+ out_columns:
2146
+ Optional Argument.
2147
+ Specifies the names of the output columns.
2148
+ Note:
2149
+ Number of elements in "columns" and "out_columns" must be same.
2150
+ Types: str or list of str
2151
+
2152
+ datatype:
2153
+ Optional Argument.
2154
+ Specifies the name of the intended datatype of the output column.
2155
+ Intended data types for the output column can be specified using either the
2156
+ teradatasqlalchemy types or the permitted strings mentioned below:
2157
+ -------------------------------------------------------------------
2158
+ | If intended SQL Data Type is | Permitted Value to be passed is |
2159
+ |-------------------------------------------------------------------|
2160
+ | bigint | bigint |
2161
+ | byteint | byteint |
2162
+ | char(n) | char,n |
2163
+ | date | date |
2164
+ | decimal(m,n) | decimal,m,n |
2165
+ | float | float |
2166
+ | integer | integer |
2167
+ | number(*) | number |
2168
+ | number(n) | number,n |
2169
+ | number(*,n) | number,*,n |
2170
+ | number(n,n) | number,n,n |
2171
+ | smallint | smallint |
2172
+ | time(p) | time,p |
2173
+ | timestamp(p) | timestamp,p |
2174
+ | varchar(n) | varchar,n |
2175
+ --------------------------------------------------------------------
2176
+ Notes:
2177
+ 1. Argument is ignored if "columns" argument is not used.
2178
+ 2. char without a size is not supported.
2179
+ 3. number(*) does not include the * in its datatype format.
2180
+ Examples:
2181
+ 1. If intended datatype for the output column is "bigint", then
2182
+ pass string "bigint" to the argument as shown below:
2183
+ datatype="bigint"
2184
+ 2. If intended datatype for the output column is "decimal(3,5)", then
2185
+ pass string "decimal,3,5" to the argument as shown below:
2186
+ datatype="decimal,3,5"
2187
+ Types: str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
2188
+ TIMESTAMP, VARCHAR.
2189
+
2190
+ fillna:
2191
+ Optional Argument.
2192
+ Specifies whether the null replacement/missing value treatment should
2193
+ be performed with rescaling or not. Output of 'FillNa()' can be passed to
2194
+ this argument.
2195
+ Note:
2196
+ If the FillNa object is created with its arguments "columns",
2197
+ "out_columns" and "datatype", then values passed in FillNa() arguments
2198
+ are ignored. Only nullstyle information is captured from the same.
2199
+ Types: FillNa
2200
+
2201
+ RETURNS:
2202
+ An instance of MinMaxScalar class.
2203
+
2204
+ RAISES:
2205
+ TeradataMlException, TypeError, ValueError
2206
+
2207
+ EXAMPLE:
2208
+ # Note:
2209
+ # To run any transformation, user needs to use Transform() function from
2210
+ # Vantage Analytic Library.
2211
+ # To do so import valib first and set the "val_install_location".
2212
+ >>> from teradataml import configure, DataFrame, MinMaxScalar, FillNa, load_example_data, valib
2213
+ >>> configure.val_install_location = "SYSLIB"
2214
+ >>>
2215
+
2216
+ # Load example data.
2217
+ >>> load_example_data("dataframe", "sales")
2218
+ >>>
2219
+
2220
+ # Create the required DataFrames.
2221
+ >>> df = DataFrame("sales")
2222
+ >>> df
2223
+ Feb Jan Mar Apr datetime
2224
+ accounts
2225
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
2226
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
2227
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
2228
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
2229
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
2230
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
2231
+ >>>
2232
+
2233
+ # Example 1: Rescale values in column "Feb", using the default bounds, which is
2234
+ # with lowerbound as 0 and upperbound as 1.
2235
+ >>> rs = MinMaxScalar(columns="Feb")
2236
+
2237
+ # Execute Transform() function.
2238
+ >>> obj = valib.Transform(data=df, rescale=rs)
2239
+ >>> obj.result
2240
+ accounts Feb
2241
+ 0 Blue Inc 0.000000
2242
+ 1 Alpha Co 1.000000
2243
+ 2 Jones LLC 0.916667
2244
+ 3 Yellow Inc 0.000000
2245
+ 4 Orange Inc 1.000000
2246
+ 5 Red Inc 0.916667
2247
+ >>>
2248
+
2249
+ # Example 2: Rescale values in column "Feb", using only lowerbound as -1.
2250
+ # To use only lowerbound, one must pass None to "ubound".
2251
+ >>> rs = MinMaxScalar(columns="Feb", lbound=-1, ubound=None)
2252
+
2253
+ # Execute Transform() function.
2254
+ >>> obj = valib.Transform(data=df, rescale=rs)
2255
+ >>> obj.result
2256
+ accounts Feb
2257
+ 0 Jones LLC 109.0
2258
+ 1 Yellow Inc -1.0
2259
+ 2 Red Inc 109.0
2260
+ 3 Blue Inc -1.0
2261
+ 4 Alpha Co 119.0
2262
+ 5 Orange Inc 119.0
2263
+ >>>
2264
+
2265
+ # Example 3: Rescale values in columns "Jan" and "Apr", using only upperbound as 10.
2266
+ # To use only upperbound, one must pass None to "lbound".
2267
+ # We shall also combine this with missing value treatment. We shall replace
2268
+ # missing values with "mode" null style replacement.
2269
+ >>> fn = FillNa(style="mode")
2270
+ >>> rs = MinMaxScalar(columns=["Jan", "Apr"], lbound=None, ubound=10, fillna=fn)
2271
+
2272
+ # Execute Transform() function.
2273
+ >>> obj = valib.Transform(data=df, rescale=rs, key_columns="accounts")
2274
+ >>> obj.result
2275
+ accounts Jan Apr
2276
+ 0 Alpha Co 10.0 10.0
2277
+ 1 Blue Inc -140.0 -139.0
2278
+ 2 Yellow Inc -40.0 10.0
2279
+ 3 Jones LLC -40.0 -60.0
2280
+ 4 Red Inc -40.0 10.0
2281
+ 5 Orange Inc -40.0 10.0
2282
+ >>>
2283
+
2284
+ # Example 4: This example shows combining multiple ways of rescaling in one
2285
+ # Transform() call.
2286
+
2287
+ # Rescale values in column "Feb" using lowerbound as -1 and upperbound as 1.
2288
+ # Name the output column as "Feb1".
2289
+ >>> rs_1 = MinMaxScalar(columns="Feb", lbound=-1, ubound=1, out_columns="Feb1")
2290
+ >>>
2291
+
2292
+ # Rescale values in column "Feb" using only upperbound as 1.
2293
+ # Name the output column as "FebU".
2294
+ >>> rs_2 = MinMaxScalar(columns="Feb", lbound=None, ubound=1, out_columns="FebU")
2295
+ >>>
2296
+
2297
+ # Rescale values in column "Feb" using only lowerbound as 0 (default value).
2298
+ # Name the output column as "FebL".
2299
+ >>> rs_3 = MinMaxScalar(columns="Feb", ubound=None, out_columns="FebL")
2300
+ >>>
2301
+
2302
+ # Rescale values in columns "Jan" and "Apr" using default bounds.
2303
+ # Name the output columns as "Jan1" and "Apr1".
2304
+ # Combine with Missing value treatment, with literal null replacement.
2305
+ >>> fn_1 = FillNa(style="literal", value=0)
2306
+ >>> rs_4 = MinMaxScalar(columns=["Jan", "Apr"], out_columns=["Jan1", "Apr1"], fillna=fn_1)
2307
+ >>>
2308
+
2309
+ # Rescale values in columns "Jan" and "Apr" using default bounds.
2310
+ # Name the output columns as "Jan2" and "Apr2".
2311
+ # Combine with Missing value treatment, with median null replacement.
2312
+ >>> fn_2 = FillNa(style="median")
2313
+ >>> rs_5 = MinMaxScalar(columns=["Jan", "Apr"], out_columns=["Jan2", "Apr2"], fillna=fn_2)
2314
+ >>>
2315
+
2316
+ # Execute Transform() function.
2317
+ >>> obj = valib.Transform(data=df, rescale=[rs_1, rs_2, rs_3, rs_4, rs_5],
2318
+ ... key_columns="accounts")
2319
+ >>> obj.result
2320
+ accounts Feb1 FebU FebL Jan1 Apr1 Jan2 Apr2
2321
+ 0 Blue Inc -1.000000 -119.0 0.0 0.25 0.404 0.000000 0.000000
2322
+ 1 Alpha Co 1.000000 1.0 120.0 1.00 1.000 1.000000 1.000000
2323
+ 2 Jones LLC 0.833333 -9.0 110.0 0.75 0.720 0.666667 0.530201
2324
+ 3 Yellow Inc -1.000000 -119.0 0.0 0.00 0.000 0.666667 0.765101
2325
+ 4 Orange Inc 1.000000 1.0 120.0 0.00 1.000 0.666667 1.000000
2326
+ 5 Red Inc 0.833333 -9.0 110.0 0.75 0.000 0.666667 0.765101
2327
+ >>>
2328
+ """
2329
+ # Call super()
2330
+ super().__init__(columns=columns, out_columns=out_columns, datatype=datatype,
2331
+ columns_optional=False)
2332
+
2333
+ # Initialize style and value as data members.
2334
+ self.lbound = lbound
2335
+ self.ubound = ubound
2336
+ self.fillna = fillna
2337
+
2338
+ # Validations
2339
+ arg_info_matrix = []
2340
+ arg_info_matrix.append(["lbound", self.lbound, True, (float, int)])
2341
+ arg_info_matrix.append(["ubound", self.ubound, True, (float, int)])
2342
+ arg_info_matrix.append(["fillna", self.fillna, True, FillNa])
2343
+ # Note:
2344
+ # Validations for "columns", "out_columns" and "datatype" is done by super().
2345
+ # Other argument validations.
2346
+ _Validators._validate_function_arguments(arg_info_matrix)
2347
+
2348
+ if self.lbound is None and self.ubound is None:
2349
+ raise TeradataMlException(
2350
+ Messages.get_message(MessageCodes.SPECIFY_AT_LEAST_ONE_ARG,
2351
+ "lbound", "ubound"),
2352
+ MessageCodes.SPECIFY_AT_LEAST_ONE_ARG)
2353
+
2354
+ def _val_sql_syntax(self):
2355
+ """
2356
+ DESCRIPTION:
2357
+ Internal function to return a string representation of rescale
2358
+ Transformation as required by SQL.
2359
+
2360
+ PARAMETERS:
2361
+ None.
2362
+
2363
+ RETURNS:
2364
+ String representing SQL syntax for 'rescale' SQL argument.
2365
+
2366
+ RAISES:
2367
+ None.
2368
+
2369
+ EXAMPLE:
2370
+ self._val_sql_syntax()
2371
+ """
2372
+ # Generate syntax for "rescale" SQL argument.
2373
+ rescale_values = []
2374
+ if self.lbound is not None:
2375
+ rescale_values.append("lowerbound/{}".format(self.lbound))
2376
+
2377
+ if self.ubound is not None:
2378
+ rescale_values.append("upperbound/{}".format(self.ubound))
2379
+
2380
+ ret_value = "rescalebounds({})".format(", ".join(rescale_values))
2381
+
2382
+ # Generate and add syntax for "columns" and "datatype" SQL arguments.
2383
+ columns_fmt = self._val_transformation_fmt()
2384
+ ret_value = "{}, {}".format(ret_value, columns_fmt)
2385
+
2386
+ # Generate and add syntax for "nullstyle", a SQL arguments.
2387
+ if self.fillna:
2388
+ ret_value = "{}, {}".format(ret_value, self.fillna._val_nullstyle_fmt())
2389
+
2390
+ return "{" + ret_value + "}"
2391
+
2392
+
2393
+ class Retain(_Transformations):
2394
+ """
2395
+ Class to represent Retain transformation technique to retain or copy columns
2396
+ from input to output.
2397
+ """
2398
+
2399
+ def __init__(self, columns, out_columns=None, datatype=None):
2400
+ """
2401
+ DESCRIPTION:
2402
+ Retain option allows you to copy one or more columns into the final
2403
+ analytic data set. By default, the result column name is the same as
2404
+ the input column name, but this can be changed. If a specific type is
2405
+ specified, it results in casting the retained column.
2406
+ The Retain transformation is supported for all valid data types.
2407
+
2408
+ Note:
2409
+ Output of this function is passed to "retain" argument of "Transform"
2410
+ function from Vantage Analytic Library.
2411
+
2412
+ PARAMETERS:
2413
+ columns:
2414
+ Required Argument.
2415
+ Specifies the names of the columns to retain.
2416
+ Types: str or list of str
2417
+
2418
+ out_columns:
2419
+ Optional Argument.
2420
+ Specifies the names of the output columns.
2421
+ Note:
2422
+ Number of elements in "columns" and "out_columns" must be same.
2423
+ Types: str or list of str
2424
+
2425
+ datatype:
2426
+ Optional Argument.
2427
+ Specifies the name of the intended datatype of the output column.
2428
+ Intended data types for the output column can be specified using either the
2429
+ teradatasqlalchemy types or the permitted strings mentioned below:
2430
+ -------------------------------------------------------------------
2431
+ | If intended SQL Data Type is | Permitted Value to be passed is |
2432
+ |-------------------------------------------------------------------|
2433
+ | bigint | bigint |
2434
+ | byteint | byteint |
2435
+ | char(n) | char,n |
2436
+ | date | date |
2437
+ | decimal(m,n) | decimal,m,n |
2438
+ | float | float |
2439
+ | integer | integer |
2440
+ | number(*) | number |
2441
+ | number(n) | number,n |
2442
+ | number(*,n) | number,*,n |
2443
+ | number(n,n) | number,n,n |
2444
+ | smallint | smallint |
2445
+ | time(p) | time,p |
2446
+ | timestamp(p) | timestamp,p |
2447
+ | varchar(n) | varchar,n |
2448
+ --------------------------------------------------------------------
2449
+ Notes:
2450
+ 1. Argument is ignored if "columns" argument is not used.
2451
+ 2. char without a size is not supported.
2452
+ 3. number(*) does not include the * in its datatype format.
2453
+ Examples:
2454
+ 1. If intended datatype for the output column is "bigint", then
2455
+ pass string "bigint" to the argument as shown below:
2456
+ datatype="bigint"
2457
+ 2. If intended datatype for the output column is "decimal(3,5)", then
2458
+ pass string "decimal,3,5" to the argument as shown below:
2459
+ datatype="decimal,3,5"
2460
+ Types: str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
2461
+ TIMESTAMP, VARCHAR.
2462
+
2463
+ RETURNS:
2464
+ An instance of Retain class.
2465
+
2466
+ RAISES:
2467
+ TeradataMlException, TypeError, ValueError
2468
+
2469
+ EXAMPLE:
2470
+ # Note:
2471
+ # To run any transformation, user needs to use Transform() function from
2472
+ # Vantage Analytic Library.
2473
+ # To do so import valib first and set the "val_install_location".
2474
+ >>> from teradataml import configure, DataFrame, load_example_data, valib, Retain
2475
+ >>> configure.val_install_location = "SYSLIB"
2476
+ >>>
2477
+
2478
+ # Load example data.
2479
+ >>> load_example_data("dataframe", "sales")
2480
+ >>>
2481
+
2482
+ # Create the required DataFrames.
2483
+ >>> sales = DataFrame("sales")
2484
+ >>> sales
2485
+ Feb Jan Mar Apr datetime
2486
+ accounts
2487
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
2488
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
2489
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
2490
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
2491
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
2492
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
2493
+ >>>
2494
+
2495
+ # Example: Shows retaining some column unchanged and some with name or datatype
2496
+ # change.
2497
+
2498
+ # Retain columns "accounts" and "Feb" as is.
2499
+ >>> rt_1 = Retain(columns=["accounts", "Feb"])
2500
+ >>>
2501
+
2502
+ # Retain column "Jan" with name as "january".
2503
+ >>> rt_2 = Retain(columns="Jan", out_columns="january")
2504
+ >>>
2505
+
2506
+ # Retain column "Mar" and "Apr" with name as "march" and "april" with
2507
+ # datatype changed to 'bigint'.
2508
+ >>> rt_3 = Retain(columns=["Mar", "Apr"], out_columns=["march", "april"],
2509
+ ... datatype="bigint")
2510
+ >>>
2511
+
2512
+
2513
+ # Execute Transform() function.
2514
+ >>> obj = valib.Transform(data=sales, retain=[rt_1, rt_2, rt_3])
2515
+ >>> obj.result
2516
+ accounts accounts1 Feb january march april
2517
+ 0 Alpha Co Alpha Co 210.0 200.0 215.0 250.0
2518
+ 1 Blue Inc Blue Inc 90.0 50.0 95.0 101.0
2519
+ 2 Yellow Inc Yellow Inc 90.0 NaN NaN NaN
2520
+ 3 Jones LLC Jones LLC 200.0 150.0 140.0 180.0
2521
+ 4 Red Inc Red Inc 200.0 150.0 140.0 NaN
2522
+ 5 Orange Inc Orange Inc 210.0 NaN NaN 250.0
2523
+ >>>
2524
+ """
2525
+ # Call super()
2526
+ super().__init__(columns=columns, out_columns=out_columns, datatype=datatype,
2527
+ columns_optional=False)
2528
+
2529
+ def _val_sql_syntax(self):
2530
+ """
2531
+ DESCRIPTION:
2532
+ Internal function to return a string representation of retain
2533
+ Transformation as required by SQL.
2534
+
2535
+ PARAMETERS:
2536
+ None.
2537
+
2538
+ RETURNS:
2539
+ String representing SQL syntax for 'retain' SQL argument.
2540
+
2541
+ RAISES:
2542
+ None.
2543
+
2544
+ EXAMPLE:
2545
+ self._val_sql_syntax()
2546
+ """
2547
+ # Generate and return syntax for "columns" and "datatype" SQL arguments.
2548
+ return "{" + self._val_transformation_fmt() + "}"
2549
+
2550
+
2551
+ class Sigmoid(_Transformations):
2552
+ """
2553
+ Class to represent sigmoid transformation technique for rescaling of continuous
2554
+ numeric data.
2555
+ """
2556
+
2557
+ def __init__(self, columns, style="logit", out_columns=None, datatype=None,
2558
+ fillna=None):
2559
+ """
2560
+ DESCRIPTION:
2561
+ Sigmoid transformation allows rescaling of continuous numeric data in a more
2562
+ sophisticated way than the Rescaling transformation function. In a Sigmoid
2563
+ transformation, a numeric column is transformed using a type of sigmoid or
2564
+ s-shaped function.
2565
+
2566
+ These non-linear transformations are more useful in data mining than a linear
2567
+ Rescaling transformation. The Sigmoid transformation is supported for numeric
2568
+ columns only.
2569
+
2570
+ For absolute values of x greater than or equal to 36, the value of the
2571
+ sigmoid function is effectively 1 for positive arguments or 0 for negative
2572
+ arguments, within about 15 digits of significance.
2573
+
2574
+ Note:
2575
+ Output of this function is passed to "sigmoid" argument of "Transform"
2576
+ function from Vantage Analytic Library.
2577
+
2578
+ PARAMETERS:
2579
+ columns:
2580
+ Required Argument.
2581
+ Specifies the names of the columns to scale.
2582
+ Types: str or list of str
2583
+
2584
+ style:
2585
+ Optional Argument.
2586
+ Specifies the style of sigmoid function to use.
2587
+ Permitted Values:
2588
+ * "logit":
2589
+ The logit function produces a continuously increasing value
2590
+ between 0 and 1.
2591
+ * "modifiedlogit":
2592
+ The modified logit function is twice the logit minus 1 and
2593
+ produces a value between -1 and 1.
2594
+ * "tanh":
2595
+ The hyperbolic tangent function also produces a value between
2596
+ -1 and 1.
2597
+ Default Value: 'logit'
2598
+ Types: str
2599
+
2600
+ out_columns:
2601
+ Optional Argument.
2602
+ Specifies the names of the output columns.
2603
+ Note:
2604
+ Number of elements in "columns" and "out_columns" must be same.
2605
+ Types: str or list of str
2606
+
2607
+ datatype:
2608
+ Optional Argument.
2609
+ Specifies the name of the intended datatype of the output column.
2610
+ Intended data types for the output column can be specified using either the
2611
+ teradatasqlalchemy types or the permitted strings mentioned below:
2612
+ -------------------------------------------------------------------
2613
+ | If intended SQL Data Type is | Permitted Value to be passed is |
2614
+ |-------------------------------------------------------------------|
2615
+ | bigint | bigint |
2616
+ | byteint | byteint |
2617
+ | char(n) | char,n |
2618
+ | date | date |
2619
+ | decimal(m,n) | decimal,m,n |
2620
+ | float | float |
2621
+ | integer | integer |
2622
+ | number(*) | number |
2623
+ | number(n) | number,n |
2624
+ | number(*,n) | number,*,n |
2625
+ | number(n,n) | number,n,n |
2626
+ | smallint | smallint |
2627
+ | time(p) | time,p |
2628
+ | timestamp(p) | timestamp,p |
2629
+ | varchar(n) | varchar,n |
2630
+ --------------------------------------------------------------------
2631
+ Notes:
2632
+ 1. Argument is ignored if "columns" argument is not used.
2633
+ 2. char without a size is not supported.
2634
+ 3. number(*) does not include the * in its datatype format.
2635
+ Examples:
2636
+ 1. If intended datatype for the output column is "bigint", then
2637
+ pass string "bigint" to the argument as shown below:
2638
+ datatype="bigint"
2639
+ 2. If intended datatype for the output column is "decimal(3,5)", then
2640
+ pass string "decimal,3,5" to the argument as shown below:
2641
+ datatype="decimal,3,5"
2642
+ Types: str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
2643
+ TIMESTAMP, VARCHAR.
2644
+
2645
+ fillna:
2646
+ Optional Argument.
2647
+ Specifies whether the null replacement/missing value treatment should
2648
+ be performed with sigmoid transformation or not. Output of FillNa() can be
2649
+ passed to this argument.
2650
+ Note:
2651
+ If the FillNa object is created with its arguments "columns",
2652
+ "out_columns" and "datatype", then values passed in FillNa() arguments
2653
+ are ignored. Only nullstyle information is captured from the same.
2654
+ Types: FillNa
2655
+
2656
+ RETURNS:
2657
+ An instance of Sigmoid class.
2658
+
2659
+ RAISES:
2660
+ TeradataMlException, TypeError, ValueError
2661
+
2662
+ EXAMPLE:
2663
+ # Note:
2664
+ # To run any transformation, user needs to use Transform() function from
2665
+ # Vantage Analytic Library.
2666
+ # To do so import valib first and set the "val_install_location".
2667
+ >>> from teradataml import configure, DataFrame, FillNa, Sigmoid, load_example_data, valib
2668
+ >>> configure.val_install_location = "SYSLIB"
2669
+ >>>
2670
+
2671
+ # Load example data.
2672
+ >>> load_example_data("dataframe", "sales")
2673
+ >>>
2674
+
2675
+ # Create the required teradataml DataFrame.
2676
+ >>> sales = DataFrame("sales")
2677
+ >>> sales
2678
+ Feb Jan Mar Apr datetime
2679
+ accounts
2680
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
2681
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
2682
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
2683
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
2684
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
2685
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
2686
+ >>>
2687
+
2688
+ # Example 1: Scale values in columns "Jan" and "Mar" using sigmoid function "tanh".
2689
+ # Combine the scaling with null replacement.
2690
+ >>> fn = FillNa(style="literal", value=0)
2691
+ >>> sig = Sigmoid(style="tanh", columns=["Jan", "Mar"], fillna=fn)
2692
+
2693
+ # Execute Transform() function.
2694
+ >>> obj = valib.Transform(data=sales, sigmoid=sig, key_columns="accounts")
2695
+ >>> obj.result
2696
+ accounts Jan Mar
2697
+ 0 Alpha Co 1.0 1.0
2698
+ 1 Red Inc 1.0 1.0
2699
+ 2 Orange Inc 0.0 0.0
2700
+ 3 Jones LLC 1.0 1.0
2701
+ 4 Yellow Inc 0.0 0.0
2702
+ 5 Blue Inc 1.0 1.0
2703
+ >>>
2704
+
2705
+
2706
+ # Example 2: Rescaling with Sigmoid is carried out with multiple styles.
2707
+ >>> load_example_data("dataframe", "iris_test")
2708
+ >>> df = DataFrame("iris_test")
2709
+ >>> df
2710
+ sepal_length sepal_width petal_length petal_width species
2711
+ id
2712
+ 5 5.0 3.6 1.4 0.2 1
2713
+ 60 5.2 2.7 3.9 1.4 2
2714
+ 15 5.8 4.0 1.2 0.2 1
2715
+ 30 4.7 3.2 1.6 0.2 1
2716
+ 40 5.1 3.4 1.5 0.2 1
2717
+ 80 5.7 2.6 3.5 1.0 2
2718
+ 120 6.0 2.2 5.0 1.5 3
2719
+ 70 5.6 2.5 3.9 1.1 2
2720
+ 20 5.1 3.8 1.5 0.3 1
2721
+ 65 5.6 2.9 3.6 1.3 2
2722
+ >>>
2723
+
2724
+ # Rescale values in columns "sepal_length", "sepal_width", "petal_length"
2725
+ # and "petal_width" with 'logit' (default) sigmoid function.
2726
+ >>> sig_1 = Sigmoid(columns=["sepal_length", "sepal_width", "petal_length",
2727
+ ... "petal_width"],
2728
+ ... out_columns=["sl", "sw", "pl", "pw"])
2729
+ >>>
2730
+
2731
+ # Rescale values in columns "sepal_length", "sepal_width", "petal_length"
2732
+ # and "petal_width" with 'tanh' sigmoid function.
2733
+ >>> sig_2 = Sigmoid(style="tanh",
2734
+ ... columns=["sepal_length", "sepal_width", "petal_length",
2735
+ ... "petal_width"],
2736
+ ... out_columns=["sl_t", "sw_t", "pl_t", "pw_t"])
2737
+ >>>
2738
+
2739
+ # Rescale values in columns "sepal_length" and "sepal_width" with 'modifiedlogit'
2740
+ # sigmoid function.
2741
+ # Combine it with null replacement using 'median' style.
2742
+ >>> fn = FillNa(style="median")
2743
+ >>> sig_3 = Sigmoid(style="modifiedlogit", columns=["sepal_length", "sepal_width"],
2744
+ ... out_columns=["sl_ml", "sw_ml"], fillna=fn)
2745
+ >>>
2746
+
2747
+ # Execute Transform() function.
2748
+ >>> obj = valib.Transform(data=df, sigmoid=[sig_1, sig_2, sig_3],
2749
+ ... key_columns="id")
2750
+ >>> obj.result
2751
+ id sl sw pl pw sl_t sw_t pl_t pw_t sl_ml sw_ml
2752
+ 0 5 0.993307 0.973403 0.802184 0.549834 0.999909 0.998508 0.885352 0.197375 0.986614 0.946806
2753
+ 1 60 0.994514 0.937027 0.980160 0.802184 0.999939 0.991007 0.999181 0.885352 0.989027 0.874053
2754
+ 2 15 0.996982 0.982014 0.768525 0.549834 0.999982 0.999329 0.833655 0.197375 0.993963 0.964028
2755
+ 3 30 0.990987 0.960834 0.832018 0.549834 0.999835 0.996682 0.921669 0.197375 0.981973 0.921669
2756
+ 4 40 0.993940 0.967705 0.817574 0.549834 0.999926 0.997775 0.905148 0.197375 0.987880 0.935409
2757
+ 5 80 0.996665 0.930862 0.970688 0.731059 0.999978 0.989027 0.998178 0.761594 0.993330 0.861723
2758
+ 6 120 0.997527 0.900250 0.993307 0.817574 0.999988 0.975743 0.999909 0.905148 0.995055 0.800499
2759
+ 7 70 0.996316 0.924142 0.980160 0.750260 0.999973 0.986614 0.999181 0.800499 0.992632 0.848284
2760
+ 8 20 0.993940 0.978119 0.817574 0.574443 0.999926 0.999000 0.905148 0.291313 0.987880 0.956237
2761
+ 9 65 0.996316 0.947846 0.973403 0.785835 0.999973 0.993963 0.998508 0.861723 0.992632 0.895693
2762
+ >>>
2763
+ """
2764
+ # Call super()
2765
+ super().__init__(columns=columns, out_columns=out_columns, datatype=datatype,
2766
+ columns_optional=False)
2767
+ # Initialize style and value as data members.
2768
+ self.style = style
2769
+ self.fillna = fillna
2770
+
2771
+ # Validations
2772
+ arg_info_matrix = []
2773
+ permitted_styles = ["LOGIT", "MODIFIEDLOGIT", "TANH"]
2774
+ arg_info_matrix.append(["style", self.style, True, str, True, permitted_styles])
2775
+ arg_info_matrix.append(["fillna", self.fillna, True, FillNa])
2776
+ # Note:
2777
+ # Validations for "columns", "out_columns" and "datatype" is done by super().
2778
+ # Other argument validations.
2779
+ _Validators._validate_function_arguments(arg_info_matrix)
2780
+
2781
+ def _val_sql_syntax(self):
2782
+ """
2783
+ DESCRIPTION:
2784
+ Internal function to return a string representation of sigmoid
2785
+ Transformation as required by SQL.
2786
+
2787
+ PARAMETERS:
2788
+ None.
2789
+
2790
+ RETURNS:
2791
+ String representing SQL syntax for 'sigmoidstyle' SQL argument.
2792
+
2793
+ RAISES:
2794
+ None.
2795
+
2796
+ EXAMPLE:
2797
+ self._val_sql_syntax()
2798
+ """
2799
+ # Generate and add syntax for "sigmoidstyle" SQL argument.
2800
+ ret_value = "sigmoidstyle({})".format(self.style.lower())
2801
+
2802
+ # Generate and add syntax for "columns" and "datatype" SQL arguments.
2803
+ columns_fmt = self._val_transformation_fmt()
2804
+ ret_value = "{}, {}".format(ret_value, columns_fmt)
2805
+
2806
+ if self.fillna:
2807
+ ret_value = "{}, {}".format(ret_value, self.fillna._val_nullstyle_fmt())
2808
+
2809
+ return "{" + ret_value + "}"
2810
+
2811
+
2812
+ class ZScore(_Transformations):
2813
+ """ Class to represent Z-Score transformation technique for rescaling. """
2814
+
2815
+ def __init__(self, columns, out_columns=None, datatype=None, fillna=None):
2816
+ """
2817
+ DESCRIPTION:
2818
+ ZScore will allows rescaling of continuous numeric data in a more
2819
+ sophisticated way than a Rescaling transformation. In a Z-Score
2820
+ transformation, a numeric column is transformed into its Z-score based
2821
+ on the mean value and standard deviation of the data in the column.
2822
+ Z-Score transforms each column value into the number of standard
2823
+ deviations from the mean value of the column. This non-linear transformation
2824
+ is useful in data mining rather than in a linear Rescaling transformation.
2825
+ The Z-Score transformation supports both numeric and date type input data.
2826
+
2827
+ Note:
2828
+ Output of this function is passed to "zscore" argument of "Transform"
2829
+ function from Vantage Analytic Library.
2830
+
2831
+ PARAMETERS:
2832
+ columns:
2833
+ Required Argument.
2834
+ Specifies the name(s) of the column(s) to perform transformation on.
2835
+ Types: str or list of str
2836
+
2837
+ out_columns:
2838
+ Optional Argument.
2839
+ Specifies the names of the output columns.
2840
+ Note:
2841
+ Number of elements in "columns" and "out_columns" must be same.
2842
+ Types: str or list of str
2843
+
2844
+ datatype:
2845
+ Optional Argument.
2846
+ Specifies the name of the intended datatype of the output column.
2847
+ Intended data types for the output column can be specified using either the
2848
+ teradatasqlalchemy types or the permitted strings mentioned below:
2849
+ -------------------------------------------------------------------
2850
+ | If intended SQL Data Type is | Permitted Value to be passed is |
2851
+ |-------------------------------------------------------------------|
2852
+ | bigint | bigint |
2853
+ | byteint | byteint |
2854
+ | char(n) | char,n |
2855
+ | date | date |
2856
+ | decimal(m,n) | decimal,m,n |
2857
+ | float | float |
2858
+ | integer | integer |
2859
+ | number(*) | number |
2860
+ | number(n) | number,n |
2861
+ | number(*,n) | number,*,n |
2862
+ | number(n,n) | number,n,n |
2863
+ | smallint | smallint |
2864
+ | time(p) | time,p |
2865
+ | timestamp(p) | timestamp,p |
2866
+ | varchar(n) | varchar,n |
2867
+ --------------------------------------------------------------------
2868
+ Notes:
2869
+ 1. Argument is ignored if "columns" argument is not used.
2870
+ 2. char without a size is not supported.
2871
+ 3. number(*) does not include the * in its datatype format.
2872
+ Examples:
2873
+ 1. If intended datatype for the output column is "bigint", then
2874
+ pass string "bigint" to the argument as shown below:
2875
+ datatype="bigint"
2876
+ 2. If intended datatype for the output column is "decimal(3,5)", then
2877
+ pass string "decimal,3,5" to the argument as shown below:
2878
+ datatype="decimal,3,5"
2879
+ Types: str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
2880
+ TIMESTAMP, VARCHAR.
2881
+
2882
+ fillna:
2883
+ Optional Argument.
2884
+ Specifies whether the null replacement/missing value treatment should
2885
+ be performed with Z-Score transformation or not. Output of 'FillNa()'
2886
+ can be passed to this argument.
2887
+ Note:
2888
+ If the FillNa object is created with its arguments "columns",
2889
+ "out_columns" and "datatype", then values passed in FillNa() arguments
2890
+ are ignored. Only nullstyle information is captured from the same.
2891
+ Types: FillNa
2892
+
2893
+ RETURNS:
2894
+ An instance of ZScore class.
2895
+
2896
+ RAISES:
2897
+ TeradataMlException, TypeError, ValueError
2898
+
2899
+ EXAMPLE:
2900
+ # Note:
2901
+ # To run any transformation, user needs to use Transform() function from
2902
+ # Vantage Analytic Library.
2903
+ # To do so import valib first and set the "val_install_location".
2904
+ >>> from teradataml import configure, DataFrame, FillNa, load_example_data, valib, ZScore
2905
+ >>> configure.val_install_location = "SYSLIB"
2906
+ >>>
2907
+
2908
+ # Load example data.
2909
+ >>> load_example_data("dataframe", "sales")
2910
+ >>>
2911
+
2912
+ # Create the required DataFrames.
2913
+ >>> sales = DataFrame("sales")
2914
+ >>> sales
2915
+ Feb Jan Mar Apr datetime
2916
+ accounts
2917
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
2918
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
2919
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
2920
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
2921
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
2922
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
2923
+ >>>
2924
+
2925
+ # Example 1: Rescaling with ZScore is carried out on "Feb" column.
2926
+ >>> zs = ZScore(columns="Feb")
2927
+
2928
+ # Execute Transform() function.
2929
+ >>> obj = valib.Transform(data=sales, zscore=zs)
2930
+ >>> obj.result
2931
+ accounts Feb
2932
+ 0 Blue Inc -1.410220
2933
+ 1 Alpha Co 0.797081
2934
+ 2 Jones LLC 0.613139
2935
+ 3 Yellow Inc -1.410220
2936
+ 4 Orange Inc 0.797081
2937
+ 5 Red Inc 0.613139
2938
+ >>>
2939
+
2940
+
2941
+ # Example 2: Rescaling with ZScore is carried out with multiple columns "Jan"
2942
+ # and "Apr" with null replacement using "mode" style.
2943
+ >>> fn = FillNa(style="mode")
2944
+ >>> zs = ZScore(columns=["Jan", "Apr"], out_columns=["january", "april"], fillna=fn)
2945
+
2946
+ # Execute Transform() function.
2947
+ >>> obj = valib.Transform(data=sales, zscore=zs, key_columns="accounts")
2948
+ >>> obj.result
2949
+ accounts january april
2950
+ 0 Blue Inc -2.042649 -1.993546
2951
+ 1 Alpha Co 1.299867 0.646795
2952
+ 2 Jones LLC 0.185695 -0.593634
2953
+ 3 Yellow Inc 0.185695 0.646795
2954
+ 4 Orange Inc 0.185695 0.646795
2955
+ 5 Red Inc 0.185695 0.646795
2956
+ >>>
2957
+ """
2958
+ # Call super()
2959
+ super().__init__(columns=columns, out_columns=out_columns, datatype=datatype,
2960
+ columns_optional=False)
2961
+ self.fillna = fillna
2962
+
2963
+ # Validations
2964
+ arg_info_matrix = []
2965
+ arg_info_matrix.append(["fillna", self.fillna, True, FillNa])
2966
+ # Note:
2967
+ # Validations for "columns", "out_columns" and "datatype" is done by super().
2968
+ # Other argument validations.
2969
+ _Validators._validate_function_arguments(arg_info_matrix)
2970
+
2971
+ def _val_sql_syntax(self):
2972
+ """
2973
+ DESCRIPTION:
2974
+ Internal function to return a string representation of zscore
2975
+ Transformation as required by SQL.
2976
+
2977
+ PARAMETERS:
2978
+ None.
2979
+
2980
+ RETURNS:
2981
+ String representing SQL syntax for 'zscore' SQL argument.
2982
+
2983
+ RAISES:
2984
+ None.
2985
+
2986
+ EXAMPLE:
2987
+ self._val_sql_syntax()
2988
+ """
2989
+ # Generate and add syntax for "columns" and "datatype" SQL arguments.
2990
+ ret_value = self._val_transformation_fmt()
2991
+
2992
+ # Generate and add syntax for "nullstyle", a SQL arguments.
2993
+ if self.fillna:
2994
+ ret_value = "{}, {}".format(ret_value, self.fillna._val_nullstyle_fmt())
2995
+
2996
+ return "{" + ret_value + "}"