teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1303) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1935 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2040 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +798 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1683 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +1011 -0
  31. teradataml/automl/data_transformation.py +789 -0
  32. teradataml/automl/feature_engineering.py +1580 -0
  33. teradataml/automl/feature_exploration.py +554 -0
  34. teradataml/automl/model_evaluation.py +151 -0
  35. teradataml/automl/model_training.py +1026 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/auth_client.py +133 -0
  41. teradataml/clients/pkce_client.py +481 -481
  42. teradataml/common/aed_utils.py +7 -2
  43. teradataml/common/bulk_exposed_utils.py +111 -111
  44. teradataml/common/constants.py +1438 -1441
  45. teradataml/common/deprecations.py +160 -0
  46. teradataml/common/exceptions.py +73 -73
  47. teradataml/common/formula.py +742 -742
  48. teradataml/common/garbagecollector.py +597 -635
  49. teradataml/common/messagecodes.py +424 -431
  50. teradataml/common/messages.py +228 -231
  51. teradataml/common/sqlbundle.py +693 -693
  52. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  53. teradataml/common/utils.py +2424 -2500
  54. teradataml/common/warnings.py +25 -25
  55. teradataml/common/wrapper_utils.py +1 -110
  56. teradataml/config/dummy_file1.cfg +4 -4
  57. teradataml/config/dummy_file2.cfg +2 -2
  58. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  59. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  60. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  61. teradataml/context/aed_context.py +217 -217
  62. teradataml/context/context.py +1091 -999
  63. teradataml/data/A_loan.csv +19 -19
  64. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  65. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  66. teradataml/data/B_loan.csv +49 -49
  67. teradataml/data/BuoyData2.csv +17 -17
  68. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  69. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  70. teradataml/data/Convolve2RealsLeft.csv +5 -5
  71. teradataml/data/Convolve2RealsRight.csv +5 -5
  72. teradataml/data/Convolve2ValidLeft.csv +11 -11
  73. teradataml/data/Convolve2ValidRight.csv +11 -11
  74. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  75. teradataml/data/Orders1_12mf.csv +24 -24
  76. teradataml/data/Pi_loan.csv +7 -7
  77. teradataml/data/SMOOTHED_DATA.csv +7 -7
  78. teradataml/data/TestDFFT8.csv +9 -9
  79. teradataml/data/TestRiver.csv +109 -109
  80. teradataml/data/Traindata.csv +28 -28
  81. teradataml/data/acf.csv +17 -17
  82. teradataml/data/adaboost_example.json +34 -34
  83. teradataml/data/adaboostpredict_example.json +24 -24
  84. teradataml/data/additional_table.csv +10 -10
  85. teradataml/data/admissions_test.csv +21 -21
  86. teradataml/data/admissions_train.csv +41 -41
  87. teradataml/data/admissions_train_nulls.csv +41 -41
  88. teradataml/data/advertising.csv +201 -0
  89. teradataml/data/ageandheight.csv +13 -13
  90. teradataml/data/ageandpressure.csv +31 -31
  91. teradataml/data/antiselect_example.json +36 -36
  92. teradataml/data/antiselect_input.csv +8 -8
  93. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  94. teradataml/data/applicant_external.csv +6 -6
  95. teradataml/data/applicant_reference.csv +6 -6
  96. teradataml/data/arima_example.json +9 -9
  97. teradataml/data/assortedtext_input.csv +8 -8
  98. teradataml/data/attribution_example.json +33 -33
  99. teradataml/data/attribution_sample_table.csv +27 -27
  100. teradataml/data/attribution_sample_table1.csv +6 -6
  101. teradataml/data/attribution_sample_table2.csv +11 -11
  102. teradataml/data/bank_churn.csv +10001 -0
  103. teradataml/data/bank_marketing.csv +11163 -0
  104. teradataml/data/bank_web_clicks1.csv +42 -42
  105. teradataml/data/bank_web_clicks2.csv +91 -91
  106. teradataml/data/bank_web_url.csv +85 -85
  107. teradataml/data/barrier.csv +2 -2
  108. teradataml/data/barrier_new.csv +3 -3
  109. teradataml/data/betweenness_example.json +13 -13
  110. teradataml/data/bike_sharing.csv +732 -0
  111. teradataml/data/bin_breaks.csv +8 -8
  112. teradataml/data/bin_fit_ip.csv +3 -3
  113. teradataml/data/binary_complex_left.csv +11 -11
  114. teradataml/data/binary_complex_right.csv +11 -11
  115. teradataml/data/binary_matrix_complex_left.csv +21 -21
  116. teradataml/data/binary_matrix_complex_right.csv +21 -21
  117. teradataml/data/binary_matrix_real_left.csv +21 -21
  118. teradataml/data/binary_matrix_real_right.csv +21 -21
  119. teradataml/data/blood2ageandweight.csv +26 -26
  120. teradataml/data/bmi.csv +501 -0
  121. teradataml/data/boston.csv +507 -507
  122. teradataml/data/boston2cols.csv +721 -0
  123. teradataml/data/breast_cancer.csv +570 -0
  124. teradataml/data/buoydata_mix.csv +11 -11
  125. teradataml/data/burst_data.csv +5 -5
  126. teradataml/data/burst_example.json +20 -20
  127. teradataml/data/byom_example.json +17 -17
  128. teradataml/data/bytes_table.csv +3 -3
  129. teradataml/data/cal_housing_ex_raw.csv +70 -70
  130. teradataml/data/callers.csv +7 -7
  131. teradataml/data/calls.csv +10 -10
  132. teradataml/data/cars_hist.csv +33 -33
  133. teradataml/data/cat_table.csv +24 -24
  134. teradataml/data/ccm_example.json +31 -31
  135. teradataml/data/ccm_input.csv +91 -91
  136. teradataml/data/ccm_input2.csv +13 -13
  137. teradataml/data/ccmexample.csv +101 -101
  138. teradataml/data/ccmprepare_example.json +8 -8
  139. teradataml/data/ccmprepare_input.csv +91 -91
  140. teradataml/data/cfilter_example.json +12 -12
  141. teradataml/data/changepointdetection_example.json +18 -18
  142. teradataml/data/changepointdetectionrt_example.json +8 -8
  143. teradataml/data/chi_sq.csv +2 -2
  144. teradataml/data/churn_data.csv +14 -14
  145. teradataml/data/churn_emission.csv +35 -35
  146. teradataml/data/churn_initial.csv +3 -3
  147. teradataml/data/churn_state_transition.csv +5 -5
  148. teradataml/data/citedges_2.csv +745 -745
  149. teradataml/data/citvertices_2.csv +1210 -1210
  150. teradataml/data/clicks2.csv +16 -16
  151. teradataml/data/clickstream.csv +12 -12
  152. teradataml/data/clickstream1.csv +11 -11
  153. teradataml/data/closeness_example.json +15 -15
  154. teradataml/data/complaints.csv +21 -21
  155. teradataml/data/complaints_mini.csv +3 -3
  156. teradataml/data/complaints_testtoken.csv +224 -224
  157. teradataml/data/complaints_tokens_test.csv +353 -353
  158. teradataml/data/complaints_traintoken.csv +472 -472
  159. teradataml/data/computers_category.csv +1001 -1001
  160. teradataml/data/computers_test1.csv +1252 -1252
  161. teradataml/data/computers_train1.csv +5009 -5009
  162. teradataml/data/computers_train1_clustered.csv +5009 -5009
  163. teradataml/data/confusionmatrix_example.json +9 -9
  164. teradataml/data/conversion_event_table.csv +3 -3
  165. teradataml/data/corr_input.csv +17 -17
  166. teradataml/data/correlation_example.json +11 -11
  167. teradataml/data/coxhazardratio_example.json +39 -39
  168. teradataml/data/coxph_example.json +15 -15
  169. teradataml/data/coxsurvival_example.json +28 -28
  170. teradataml/data/cpt.csv +41 -41
  171. teradataml/data/credit_ex_merged.csv +45 -45
  172. teradataml/data/customer_loyalty.csv +301 -301
  173. teradataml/data/customer_loyalty_newseq.csv +31 -31
  174. teradataml/data/customer_segmentation_test.csv +2628 -0
  175. teradataml/data/customer_segmentation_train.csv +8069 -0
  176. teradataml/data/dataframe_example.json +146 -146
  177. teradataml/data/decisionforest_example.json +37 -37
  178. teradataml/data/decisionforestpredict_example.json +38 -38
  179. teradataml/data/decisiontree_example.json +21 -21
  180. teradataml/data/decisiontreepredict_example.json +45 -45
  181. teradataml/data/dfft2_size4_real.csv +17 -17
  182. teradataml/data/dfft2_test_matrix16.csv +17 -17
  183. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  184. teradataml/data/diabetes.csv +443 -443
  185. teradataml/data/diabetes_test.csv +89 -89
  186. teradataml/data/dict_table.csv +5 -5
  187. teradataml/data/docperterm_table.csv +4 -4
  188. teradataml/data/docs/__init__.py +1 -1
  189. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  190. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  191. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  192. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  193. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  194. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  195. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  196. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  197. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  198. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  199. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  200. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  201. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  202. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  203. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  204. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  205. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  206. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  207. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  208. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  209. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  210. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  211. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  212. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  213. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  214. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  215. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  216. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  217. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +134 -132
  218. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +109 -103
  219. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  220. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +105 -101
  221. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  222. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  223. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  224. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  225. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  226. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  227. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  228. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  229. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  230. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  231. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  232. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  233. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  234. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  235. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  236. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  237. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  238. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  239. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  240. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  241. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  242. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +186 -126
  243. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  244. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  245. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  246. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  247. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  248. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  249. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  250. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  251. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +245 -243
  252. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  253. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  254. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  255. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  256. teradataml/data/docs/sqle/docs_17_20/FTest.py +239 -160
  257. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  258. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  259. teradataml/data/docs/sqle/docs_17_20/GLM.py +541 -380
  260. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  261. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  262. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  263. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +125 -123
  264. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  265. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  266. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  267. teradataml/data/docs/sqle/docs_17_20/KMeans.py +251 -204
  268. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  269. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  270. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  271. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  272. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  273. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  274. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  275. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  276. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +118 -117
  277. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  278. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  279. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  280. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  281. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +230 -225
  282. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +121 -115
  283. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  284. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  285. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  286. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  287. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  288. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  289. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  290. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  291. teradataml/data/docs/sqle/docs_17_20/ROC.py +164 -163
  292. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  293. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  294. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  295. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  296. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  297. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  298. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  299. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  300. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +213 -202
  301. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  302. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +315 -197
  303. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +202 -109
  304. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  305. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  306. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  307. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  308. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  309. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  310. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  311. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  312. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +333 -171
  313. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  314. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  315. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  316. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  317. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  318. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  319. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  320. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  321. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  322. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  323. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  324. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +361 -353
  325. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +281 -275
  326. teradataml/data/docs/sqle/docs_17_20/ZTest.py +220 -155
  327. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  328. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  329. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  330. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  331. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  332. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  333. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  334. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  335. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  336. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  337. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  338. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  339. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  340. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  341. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  342. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  343. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  344. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  345. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  346. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  347. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  348. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  349. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  350. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  351. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  352. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  353. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  354. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  355. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  356. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  357. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  358. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  359. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  360. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  361. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  362. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  363. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  364. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  365. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  366. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  367. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  368. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  369. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  370. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  371. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  372. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  373. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  374. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  375. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  376. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  377. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  378. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  379. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  380. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  381. teradataml/data/dtw_example.json +17 -17
  382. teradataml/data/dtw_t1.csv +11 -11
  383. teradataml/data/dtw_t2.csv +4 -4
  384. teradataml/data/dwt2d_example.json +15 -15
  385. teradataml/data/dwt_example.json +14 -14
  386. teradataml/data/dwt_filter_dim.csv +5 -5
  387. teradataml/data/emission.csv +9 -9
  388. teradataml/data/emp_table_by_dept.csv +19 -19
  389. teradataml/data/employee_info.csv +4 -4
  390. teradataml/data/employee_table.csv +6 -6
  391. teradataml/data/excluding_event_table.csv +2 -2
  392. teradataml/data/finance_data.csv +6 -6
  393. teradataml/data/finance_data2.csv +61 -61
  394. teradataml/data/finance_data3.csv +93 -93
  395. teradataml/data/fish.csv +160 -0
  396. teradataml/data/fm_blood2ageandweight.csv +26 -26
  397. teradataml/data/fmeasure_example.json +11 -11
  398. teradataml/data/followers_leaders.csv +10 -10
  399. teradataml/data/fpgrowth_example.json +12 -12
  400. teradataml/data/frequentpaths_example.json +29 -29
  401. teradataml/data/friends.csv +9 -9
  402. teradataml/data/fs_input.csv +33 -33
  403. teradataml/data/fs_input1.csv +33 -33
  404. teradataml/data/genData.csv +513 -513
  405. teradataml/data/geodataframe_example.json +39 -39
  406. teradataml/data/glass_types.csv +215 -0
  407. teradataml/data/glm_admissions_model.csv +12 -12
  408. teradataml/data/glm_example.json +56 -29
  409. teradataml/data/glml1l2_example.json +28 -28
  410. teradataml/data/glml1l2predict_example.json +54 -54
  411. teradataml/data/glmpredict_example.json +54 -54
  412. teradataml/data/gq_t1.csv +21 -21
  413. teradataml/data/hconvolve_complex_right.csv +5 -5
  414. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  415. teradataml/data/histogram_example.json +11 -11
  416. teradataml/data/hmmdecoder_example.json +78 -78
  417. teradataml/data/hmmevaluator_example.json +24 -24
  418. teradataml/data/hmmsupervised_example.json +10 -10
  419. teradataml/data/hmmunsupervised_example.json +7 -7
  420. teradataml/data/house_values.csv +12 -12
  421. teradataml/data/house_values2.csv +13 -13
  422. teradataml/data/housing_cat.csv +7 -7
  423. teradataml/data/housing_data.csv +9 -9
  424. teradataml/data/housing_test.csv +47 -47
  425. teradataml/data/housing_test_binary.csv +47 -47
  426. teradataml/data/housing_train.csv +493 -493
  427. teradataml/data/housing_train_attribute.csv +4 -4
  428. teradataml/data/housing_train_binary.csv +437 -437
  429. teradataml/data/housing_train_parameter.csv +2 -2
  430. teradataml/data/housing_train_response.csv +493 -493
  431. teradataml/data/housing_train_segment.csv +201 -0
  432. teradataml/data/ibm_stock.csv +370 -370
  433. teradataml/data/ibm_stock1.csv +370 -370
  434. teradataml/data/identitymatch_example.json +21 -21
  435. teradataml/data/idf_table.csv +4 -4
  436. teradataml/data/impressions.csv +101 -101
  437. teradataml/data/inflation.csv +21 -21
  438. teradataml/data/initial.csv +3 -3
  439. teradataml/data/insect2Cols.csv +61 -0
  440. teradataml/data/insect_sprays.csv +12 -12
  441. teradataml/data/insurance.csv +1339 -1339
  442. teradataml/data/interpolator_example.json +12 -12
  443. teradataml/data/iris_altinput.csv +481 -481
  444. teradataml/data/iris_attribute_output.csv +8 -8
  445. teradataml/data/iris_attribute_test.csv +121 -121
  446. teradataml/data/iris_attribute_train.csv +481 -481
  447. teradataml/data/iris_category_expect_predict.csv +31 -31
  448. teradataml/data/iris_data.csv +151 -0
  449. teradataml/data/iris_input.csv +151 -151
  450. teradataml/data/iris_response_train.csv +121 -121
  451. teradataml/data/iris_test.csv +31 -31
  452. teradataml/data/iris_train.csv +121 -121
  453. teradataml/data/join_table1.csv +4 -4
  454. teradataml/data/join_table2.csv +4 -4
  455. teradataml/data/jsons/anly_function_name.json +6 -6
  456. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  457. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  458. teradataml/data/jsons/byom/h2opredict.json +194 -194
  459. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  460. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  461. teradataml/data/jsons/paired_functions.json +435 -435
  462. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  463. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  464. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  465. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  466. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  467. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  468. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  469. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  470. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  471. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  472. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  473. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  474. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  475. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  476. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  477. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  478. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  479. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  480. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  481. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  482. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  483. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  484. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  485. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  486. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  487. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  488. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  489. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  490. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  491. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  492. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  493. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  494. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  495. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  496. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  497. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  498. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  499. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  500. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  501. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  502. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  503. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  504. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  505. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  506. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  507. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  508. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  509. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  510. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  511. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  512. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  513. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  514. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  515. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  516. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  517. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  518. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  519. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  520. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  521. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  522. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  523. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  524. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  525. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  526. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  527. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  528. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  529. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  531. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  532. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  533. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  534. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  535. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  536. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  537. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  539. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  540. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  541. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  542. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  543. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  544. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  545. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  546. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  547. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  548. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  549. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  550. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  551. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  552. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  553. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  554. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  555. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  556. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  557. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  558. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  559. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  560. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  561. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  562. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  563. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  564. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  565. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  566. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  567. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  568. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +148 -76
  569. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  570. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  571. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  572. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  573. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  574. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  575. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  576. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  577. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  578. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  579. teradataml/data/jsons/sqle/17.20/TD_FTest.json +269 -186
  580. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  581. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  582. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  583. teradataml/data/jsons/sqle/17.20/TD_GLM.json +507 -431
  584. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +168 -125
  585. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  586. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  587. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +93 -91
  588. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  589. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  590. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  591. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +231 -211
  592. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  593. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  594. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  595. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +102 -101
  596. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  597. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  598. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  599. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  600. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  601. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  602. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  603. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  604. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  605. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  606. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  607. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  608. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  609. teradataml/data/jsons/sqle/17.20/TD_ROC.json +178 -177
  610. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  611. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  612. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  613. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  614. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  615. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  616. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  617. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  618. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +142 -124
  619. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +309 -156
  620. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +119 -70
  621. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  622. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  623. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  624. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  625. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  626. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  627. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  628. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  629. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  630. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  631. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  632. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  633. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  634. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  635. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +330 -312
  636. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +195 -182
  637. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +247 -170
  638. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  639. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  640. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  641. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  642. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  643. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  644. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  645. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  646. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  647. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  648. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  649. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  650. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  651. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  653. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  654. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  655. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  656. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  657. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  658. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  659. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  660. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  661. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  662. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  663. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  664. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  665. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  666. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  667. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  668. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  669. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  670. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  671. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  672. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  673. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  674. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  675. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  676. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  677. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  678. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  679. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  680. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  681. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  682. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  683. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  684. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  685. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  686. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  687. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  688. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  689. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  690. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  691. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  692. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  693. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  694. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  695. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  696. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  697. teradataml/data/kmeans_example.json +22 -17
  698. teradataml/data/kmeans_table.csv +10 -0
  699. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  700. teradataml/data/knn_example.json +18 -18
  701. teradataml/data/knnrecommender_example.json +6 -6
  702. teradataml/data/knnrecommenderpredict_example.json +12 -12
  703. teradataml/data/lar_example.json +17 -17
  704. teradataml/data/larpredict_example.json +30 -30
  705. teradataml/data/lc_new_predictors.csv +5 -5
  706. teradataml/data/lc_new_reference.csv +9 -9
  707. teradataml/data/lda_example.json +8 -8
  708. teradataml/data/ldainference_example.json +14 -14
  709. teradataml/data/ldatopicsummary_example.json +8 -8
  710. teradataml/data/levendist_input.csv +13 -13
  711. teradataml/data/levenshteindistance_example.json +10 -10
  712. teradataml/data/linreg_example.json +9 -9
  713. teradataml/data/load_example_data.py +326 -323
  714. teradataml/data/loan_prediction.csv +295 -295
  715. teradataml/data/lungcancer.csv +138 -138
  716. teradataml/data/mappingdata.csv +12 -12
  717. teradataml/data/milk_timeseries.csv +157 -157
  718. teradataml/data/min_max_titanic.csv +4 -4
  719. teradataml/data/minhash_example.json +6 -6
  720. teradataml/data/ml_ratings.csv +7547 -7547
  721. teradataml/data/ml_ratings_10.csv +2445 -2445
  722. teradataml/data/model1_table.csv +5 -5
  723. teradataml/data/model2_table.csv +5 -5
  724. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  725. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  726. teradataml/data/modularity_example.json +12 -12
  727. teradataml/data/movavg_example.json +7 -7
  728. teradataml/data/mtx1.csv +7 -7
  729. teradataml/data/mtx2.csv +13 -13
  730. teradataml/data/multi_model_classification.csv +401 -0
  731. teradataml/data/multi_model_regression.csv +401 -0
  732. teradataml/data/mvdfft8.csv +9 -9
  733. teradataml/data/naivebayes_example.json +9 -9
  734. teradataml/data/naivebayespredict_example.json +19 -19
  735. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  736. teradataml/data/naivebayestextclassifier_example.json +8 -8
  737. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  738. teradataml/data/name_Find_configure.csv +10 -10
  739. teradataml/data/namedentityfinder_example.json +14 -14
  740. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  741. teradataml/data/namedentityfindertrainer_example.json +6 -6
  742. teradataml/data/nb_iris_input_test.csv +31 -31
  743. teradataml/data/nb_iris_input_train.csv +121 -121
  744. teradataml/data/nbp_iris_model.csv +13 -13
  745. teradataml/data/ner_extractor_text.csv +2 -2
  746. teradataml/data/ner_sports_test2.csv +29 -29
  747. teradataml/data/ner_sports_train.csv +501 -501
  748. teradataml/data/nerevaluator_example.json +5 -5
  749. teradataml/data/nerextractor_example.json +18 -18
  750. teradataml/data/nermem_sports_test.csv +17 -17
  751. teradataml/data/nermem_sports_train.csv +50 -50
  752. teradataml/data/nertrainer_example.json +6 -6
  753. teradataml/data/ngrams_example.json +6 -6
  754. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  755. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  756. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  757. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  758. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  759. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  760. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  761. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  762. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  763. teradataml/data/npath_example.json +23 -23
  764. teradataml/data/ntree_example.json +14 -14
  765. teradataml/data/numeric_strings.csv +4 -4
  766. teradataml/data/numerics.csv +4 -4
  767. teradataml/data/ocean_buoy.csv +17 -17
  768. teradataml/data/ocean_buoy2.csv +17 -17
  769. teradataml/data/ocean_buoys.csv +27 -27
  770. teradataml/data/ocean_buoys2.csv +10 -10
  771. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  772. teradataml/data/ocean_buoys_seq.csv +29 -29
  773. teradataml/data/onehot_encoder_train.csv +4 -0
  774. teradataml/data/openml_example.json +92 -0
  775. teradataml/data/optional_event_table.csv +4 -4
  776. teradataml/data/orders1.csv +11 -11
  777. teradataml/data/orders1_12.csv +12 -12
  778. teradataml/data/orders_ex.csv +4 -4
  779. teradataml/data/pack_example.json +8 -8
  780. teradataml/data/package_tracking.csv +19 -19
  781. teradataml/data/package_tracking_pti.csv +18 -18
  782. teradataml/data/pagerank_example.json +13 -13
  783. teradataml/data/paragraphs_input.csv +6 -6
  784. teradataml/data/pathanalyzer_example.json +7 -7
  785. teradataml/data/pathgenerator_example.json +7 -7
  786. teradataml/data/phrases.csv +7 -7
  787. teradataml/data/pivot_example.json +8 -8
  788. teradataml/data/pivot_input.csv +22 -22
  789. teradataml/data/playerRating.csv +31 -31
  790. teradataml/data/postagger_example.json +6 -6
  791. teradataml/data/posttagger_output.csv +44 -44
  792. teradataml/data/production_data.csv +16 -16
  793. teradataml/data/production_data2.csv +7 -7
  794. teradataml/data/randomsample_example.json +31 -31
  795. teradataml/data/randomwalksample_example.json +8 -8
  796. teradataml/data/rank_table.csv +6 -6
  797. teradataml/data/ref_mobile_data.csv +4 -4
  798. teradataml/data/ref_mobile_data_dense.csv +2 -2
  799. teradataml/data/ref_url.csv +17 -17
  800. teradataml/data/restaurant_reviews.csv +7 -7
  801. teradataml/data/river_data.csv +145 -145
  802. teradataml/data/roc_example.json +7 -7
  803. teradataml/data/roc_input.csv +101 -101
  804. teradataml/data/rule_inputs.csv +6 -6
  805. teradataml/data/rule_table.csv +2 -2
  806. teradataml/data/sales.csv +7 -7
  807. teradataml/data/sales_transaction.csv +501 -501
  808. teradataml/data/salesdata.csv +342 -342
  809. teradataml/data/sample_cities.csv +2 -2
  810. teradataml/data/sample_shapes.csv +10 -10
  811. teradataml/data/sample_streets.csv +2 -2
  812. teradataml/data/sampling_example.json +15 -15
  813. teradataml/data/sax_example.json +8 -8
  814. teradataml/data/scale_attributes.csv +3 -0
  815. teradataml/data/scale_example.json +74 -23
  816. teradataml/data/scale_housing.csv +11 -11
  817. teradataml/data/scale_housing_test.csv +6 -6
  818. teradataml/data/scale_input_part_sparse.csv +31 -0
  819. teradataml/data/scale_input_partitioned.csv +16 -0
  820. teradataml/data/scale_input_sparse.csv +11 -0
  821. teradataml/data/scale_parameters.csv +3 -0
  822. teradataml/data/scale_stat.csv +11 -11
  823. teradataml/data/scalebypartition_example.json +13 -13
  824. teradataml/data/scalemap_example.json +13 -13
  825. teradataml/data/scalesummary_example.json +12 -12
  826. teradataml/data/score_category.csv +101 -101
  827. teradataml/data/score_summary.csv +4 -4
  828. teradataml/data/script_example.json +9 -9
  829. teradataml/data/scripts/deploy_script.py +84 -0
  830. teradataml/data/scripts/mapper.R +20 -0
  831. teradataml/data/scripts/mapper.py +15 -15
  832. teradataml/data/scripts/mapper_replace.py +15 -15
  833. teradataml/data/scripts/sklearn/__init__.py +0 -0
  834. teradataml/data/scripts/sklearn/sklearn_fit.py +171 -0
  835. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +127 -0
  836. teradataml/data/scripts/sklearn/sklearn_function.template +108 -0
  837. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +148 -0
  838. teradataml/data/scripts/sklearn/sklearn_neighbors.py +143 -0
  839. teradataml/data/scripts/sklearn/sklearn_score.py +119 -0
  840. teradataml/data/scripts/sklearn/sklearn_transform.py +171 -0
  841. teradataml/data/seeds.csv +10 -10
  842. teradataml/data/sentenceextractor_example.json +6 -6
  843. teradataml/data/sentiment_extract_input.csv +11 -11
  844. teradataml/data/sentiment_train.csv +16 -16
  845. teradataml/data/sentiment_word.csv +20 -20
  846. teradataml/data/sentiment_word_input.csv +19 -19
  847. teradataml/data/sentimentextractor_example.json +24 -24
  848. teradataml/data/sentimenttrainer_example.json +8 -8
  849. teradataml/data/sequence_table.csv +10 -10
  850. teradataml/data/seriessplitter_example.json +7 -7
  851. teradataml/data/sessionize_example.json +17 -17
  852. teradataml/data/sessionize_table.csv +116 -116
  853. teradataml/data/setop_test1.csv +24 -24
  854. teradataml/data/setop_test2.csv +22 -22
  855. teradataml/data/soc_nw_edges.csv +10 -10
  856. teradataml/data/soc_nw_vertices.csv +7 -7
  857. teradataml/data/souvenir_timeseries.csv +167 -167
  858. teradataml/data/sparse_iris_attribute.csv +5 -5
  859. teradataml/data/sparse_iris_test.csv +121 -121
  860. teradataml/data/sparse_iris_train.csv +601 -601
  861. teradataml/data/star1.csv +6 -6
  862. teradataml/data/state_transition.csv +5 -5
  863. teradataml/data/stock_data.csv +53 -53
  864. teradataml/data/stock_movement.csv +11 -11
  865. teradataml/data/stock_vol.csv +76 -76
  866. teradataml/data/stop_words.csv +8 -8
  867. teradataml/data/store_sales.csv +37 -37
  868. teradataml/data/stringsimilarity_example.json +7 -7
  869. teradataml/data/strsimilarity_input.csv +13 -13
  870. teradataml/data/students.csv +101 -101
  871. teradataml/data/svm_iris_input_test.csv +121 -121
  872. teradataml/data/svm_iris_input_train.csv +481 -481
  873. teradataml/data/svm_iris_model.csv +7 -7
  874. teradataml/data/svmdense_example.json +9 -9
  875. teradataml/data/svmdensepredict_example.json +18 -18
  876. teradataml/data/svmsparse_example.json +7 -7
  877. teradataml/data/svmsparsepredict_example.json +13 -13
  878. teradataml/data/svmsparsesummary_example.json +7 -7
  879. teradataml/data/target_mobile_data.csv +13 -13
  880. teradataml/data/target_mobile_data_dense.csv +5 -5
  881. teradataml/data/templatedata.csv +1201 -1201
  882. teradataml/data/templates/open_source_ml.json +9 -0
  883. teradataml/data/teradataml_example.json +150 -1
  884. teradataml/data/test_classification.csv +101 -0
  885. teradataml/data/test_loan_prediction.csv +53 -53
  886. teradataml/data/test_pacf_12.csv +37 -37
  887. teradataml/data/test_prediction.csv +101 -0
  888. teradataml/data/test_regression.csv +101 -0
  889. teradataml/data/test_river2.csv +109 -109
  890. teradataml/data/text_inputs.csv +6 -6
  891. teradataml/data/textchunker_example.json +7 -7
  892. teradataml/data/textclassifier_example.json +6 -6
  893. teradataml/data/textclassifier_input.csv +7 -7
  894. teradataml/data/textclassifiertrainer_example.json +6 -6
  895. teradataml/data/textmorph_example.json +5 -5
  896. teradataml/data/textparser_example.json +15 -15
  897. teradataml/data/texttagger_example.json +11 -11
  898. teradataml/data/texttokenizer_example.json +6 -6
  899. teradataml/data/texttrainer_input.csv +11 -11
  900. teradataml/data/tf_example.json +6 -6
  901. teradataml/data/tfidf_example.json +13 -13
  902. teradataml/data/tfidf_input1.csv +201 -201
  903. teradataml/data/tfidf_train.csv +6 -6
  904. teradataml/data/time_table1.csv +535 -535
  905. teradataml/data/time_table2.csv +14 -14
  906. teradataml/data/timeseriesdata.csv +1601 -1601
  907. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  908. teradataml/data/titanic.csv +892 -892
  909. teradataml/data/token_table.csv +696 -696
  910. teradataml/data/train_multiclass.csv +101 -0
  911. teradataml/data/train_regression.csv +101 -0
  912. teradataml/data/train_regression_multiple_labels.csv +101 -0
  913. teradataml/data/train_tracking.csv +27 -27
  914. teradataml/data/transformation_table.csv +5 -5
  915. teradataml/data/transformation_table_new.csv +1 -1
  916. teradataml/data/tv_spots.csv +16 -16
  917. teradataml/data/twod_climate_data.csv +117 -117
  918. teradataml/data/uaf_example.json +475 -475
  919. teradataml/data/univariatestatistics_example.json +8 -8
  920. teradataml/data/unpack_example.json +9 -9
  921. teradataml/data/unpivot_example.json +9 -9
  922. teradataml/data/unpivot_input.csv +8 -8
  923. teradataml/data/us_air_pass.csv +36 -36
  924. teradataml/data/us_population.csv +624 -624
  925. teradataml/data/us_states_shapes.csv +52 -52
  926. teradataml/data/varmax_example.json +17 -17
  927. teradataml/data/vectordistance_example.json +25 -25
  928. teradataml/data/ville_climatedata.csv +121 -121
  929. teradataml/data/ville_tempdata.csv +12 -12
  930. teradataml/data/ville_tempdata1.csv +12 -12
  931. teradataml/data/ville_temperature.csv +11 -11
  932. teradataml/data/waveletTable.csv +1605 -1605
  933. teradataml/data/waveletTable2.csv +1605 -1605
  934. teradataml/data/weightedmovavg_example.json +8 -8
  935. teradataml/data/wft_testing.csv +5 -5
  936. teradataml/data/wine_data.csv +1600 -0
  937. teradataml/data/word_embed_input_table1.csv +5 -5
  938. teradataml/data/word_embed_input_table2.csv +4 -4
  939. teradataml/data/word_embed_model.csv +22 -22
  940. teradataml/data/words_input.csv +13 -13
  941. teradataml/data/xconvolve_complex_left.csv +6 -6
  942. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  943. teradataml/data/xgboost_example.json +35 -35
  944. teradataml/data/xgboostpredict_example.json +31 -31
  945. teradataml/data/ztest_example.json +16 -0
  946. teradataml/dataframe/copy_to.py +1769 -1698
  947. teradataml/dataframe/data_transfer.py +2812 -2745
  948. teradataml/dataframe/dataframe.py +17630 -16946
  949. teradataml/dataframe/dataframe_utils.py +1875 -1740
  950. teradataml/dataframe/fastload.py +794 -603
  951. teradataml/dataframe/indexer.py +424 -424
  952. teradataml/dataframe/setop.py +1179 -1166
  953. teradataml/dataframe/sql.py +10174 -6432
  954. teradataml/dataframe/sql_function_parameters.py +439 -388
  955. teradataml/dataframe/sql_functions.py +652 -652
  956. teradataml/dataframe/sql_interfaces.py +220 -220
  957. teradataml/dataframe/vantage_function_types.py +674 -630
  958. teradataml/dataframe/window.py +693 -692
  959. teradataml/dbutils/__init__.py +3 -3
  960. teradataml/dbutils/dbutils.py +1167 -1150
  961. teradataml/dbutils/filemgr.py +267 -267
  962. teradataml/gen_ai/__init__.py +2 -2
  963. teradataml/gen_ai/convAI.py +472 -472
  964. teradataml/geospatial/__init__.py +3 -3
  965. teradataml/geospatial/geodataframe.py +1105 -1094
  966. teradataml/geospatial/geodataframecolumn.py +392 -387
  967. teradataml/geospatial/geometry_types.py +925 -925
  968. teradataml/hyperparameter_tuner/__init__.py +1 -1
  969. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  970. teradataml/hyperparameter_tuner/utils.py +281 -187
  971. teradataml/lib/aed_0_1.dll +0 -0
  972. teradataml/lib/libaed_0_1.dylib +0 -0
  973. teradataml/lib/libaed_0_1.so +0 -0
  974. teradataml/libaed_0_1.dylib +0 -0
  975. teradataml/libaed_0_1.so +0 -0
  976. teradataml/opensource/__init__.py +1 -0
  977. teradataml/opensource/sklearn/__init__.py +1 -0
  978. teradataml/opensource/sklearn/_class.py +255 -0
  979. teradataml/opensource/sklearn/_sklearn_wrapper.py +1715 -0
  980. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  981. teradataml/opensource/sklearn/constants.py +54 -0
  982. teradataml/options/__init__.py +130 -124
  983. teradataml/options/configure.py +358 -336
  984. teradataml/options/display.py +176 -176
  985. teradataml/plot/__init__.py +2 -2
  986. teradataml/plot/axis.py +1388 -1388
  987. teradataml/plot/constants.py +15 -15
  988. teradataml/plot/figure.py +398 -398
  989. teradataml/plot/plot.py +760 -760
  990. teradataml/plot/query_generator.py +83 -83
  991. teradataml/plot/subplot.py +216 -216
  992. teradataml/scriptmgmt/UserEnv.py +3791 -3761
  993. teradataml/scriptmgmt/__init__.py +3 -3
  994. teradataml/scriptmgmt/lls_utils.py +1719 -1604
  995. teradataml/series/series.py +532 -532
  996. teradataml/series/series_utils.py +71 -71
  997. teradataml/table_operators/Apply.py +949 -917
  998. teradataml/table_operators/Script.py +1718 -1982
  999. teradataml/table_operators/TableOperator.py +1255 -1616
  1000. teradataml/table_operators/__init__.py +2 -3
  1001. teradataml/table_operators/apply_query_generator.py +262 -262
  1002. teradataml/table_operators/query_generator.py +507 -507
  1003. teradataml/table_operators/table_operator_query_generator.py +460 -460
  1004. teradataml/table_operators/table_operator_util.py +631 -639
  1005. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  1006. teradataml/table_operators/templates/dataframe_map.template +176 -176
  1007. teradataml/table_operators/templates/script_executor.template +170 -170
  1008. teradataml/utils/dtypes.py +684 -684
  1009. teradataml/utils/internal_buffer.py +84 -84
  1010. teradataml/utils/print_versions.py +205 -205
  1011. teradataml/utils/utils.py +410 -410
  1012. teradataml/utils/validators.py +2277 -2115
  1013. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +346 -45
  1014. teradataml-20.0.0.1.dist-info/RECORD +1056 -0
  1015. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +1 -1
  1016. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +1 -1
  1017. teradataml/analytics/mle/AdaBoost.py +0 -651
  1018. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1019. teradataml/analytics/mle/Antiselect.py +0 -342
  1020. teradataml/analytics/mle/Arima.py +0 -641
  1021. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1022. teradataml/analytics/mle/Attribution.py +0 -1070
  1023. teradataml/analytics/mle/Betweenness.py +0 -658
  1024. teradataml/analytics/mle/Burst.py +0 -711
  1025. teradataml/analytics/mle/CCM.py +0 -600
  1026. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1027. teradataml/analytics/mle/CFilter.py +0 -460
  1028. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1029. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1030. teradataml/analytics/mle/Closeness.py +0 -737
  1031. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1032. teradataml/analytics/mle/Correlation.py +0 -477
  1033. teradataml/analytics/mle/Correlation2.py +0 -573
  1034. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1035. teradataml/analytics/mle/CoxPH.py +0 -556
  1036. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1037. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1038. teradataml/analytics/mle/DTW.py +0 -623
  1039. teradataml/analytics/mle/DWT.py +0 -564
  1040. teradataml/analytics/mle/DWT2D.py +0 -599
  1041. teradataml/analytics/mle/DecisionForest.py +0 -716
  1042. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1043. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1044. teradataml/analytics/mle/DecisionTree.py +0 -830
  1045. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1046. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1047. teradataml/analytics/mle/FMeasure.py +0 -402
  1048. teradataml/analytics/mle/FPGrowth.py +0 -734
  1049. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1050. teradataml/analytics/mle/GLM.py +0 -558
  1051. teradataml/analytics/mle/GLML1L2.py +0 -547
  1052. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1053. teradataml/analytics/mle/GLMPredict.py +0 -529
  1054. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1055. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1056. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1057. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1058. teradataml/analytics/mle/Histogram.py +0 -561
  1059. teradataml/analytics/mle/IDWT.py +0 -476
  1060. teradataml/analytics/mle/IDWT2D.py +0 -493
  1061. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1062. teradataml/analytics/mle/Interpolator.py +0 -918
  1063. teradataml/analytics/mle/KMeans.py +0 -485
  1064. teradataml/analytics/mle/KNN.py +0 -627
  1065. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1066. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1067. teradataml/analytics/mle/LAR.py +0 -439
  1068. teradataml/analytics/mle/LARPredict.py +0 -478
  1069. teradataml/analytics/mle/LDA.py +0 -548
  1070. teradataml/analytics/mle/LDAInference.py +0 -492
  1071. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1072. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1073. teradataml/analytics/mle/LinReg.py +0 -433
  1074. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1075. teradataml/analytics/mle/MinHash.py +0 -544
  1076. teradataml/analytics/mle/Modularity.py +0 -587
  1077. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1078. teradataml/analytics/mle/NERExtractor.py +0 -595
  1079. teradataml/analytics/mle/NERTrainer.py +0 -458
  1080. teradataml/analytics/mle/NGrams.py +0 -570
  1081. teradataml/analytics/mle/NPath.py +0 -634
  1082. teradataml/analytics/mle/NTree.py +0 -549
  1083. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1084. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1085. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1086. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1087. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1088. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1089. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1090. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1091. teradataml/analytics/mle/POSTagger.py +0 -417
  1092. teradataml/analytics/mle/Pack.py +0 -411
  1093. teradataml/analytics/mle/PageRank.py +0 -535
  1094. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1095. teradataml/analytics/mle/PathGenerator.py +0 -367
  1096. teradataml/analytics/mle/PathStart.py +0 -464
  1097. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1098. teradataml/analytics/mle/Pivot.py +0 -471
  1099. teradataml/analytics/mle/ROC.py +0 -425
  1100. teradataml/analytics/mle/RandomSample.py +0 -637
  1101. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1102. teradataml/analytics/mle/SAX.py +0 -779
  1103. teradataml/analytics/mle/SVMDense.py +0 -677
  1104. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1105. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1106. teradataml/analytics/mle/SVMSparse.py +0 -557
  1107. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1108. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1109. teradataml/analytics/mle/Sampling.py +0 -549
  1110. teradataml/analytics/mle/Scale.py +0 -565
  1111. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1112. teradataml/analytics/mle/ScaleMap.py +0 -378
  1113. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1114. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1115. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1116. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1117. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1118. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1119. teradataml/analytics/mle/Sessionize.py +0 -475
  1120. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1121. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1122. teradataml/analytics/mle/TF.py +0 -389
  1123. teradataml/analytics/mle/TFIDF.py +0 -504
  1124. teradataml/analytics/mle/TextChunker.py +0 -414
  1125. teradataml/analytics/mle/TextClassifier.py +0 -399
  1126. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1127. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1128. teradataml/analytics/mle/TextMorph.py +0 -494
  1129. teradataml/analytics/mle/TextParser.py +0 -623
  1130. teradataml/analytics/mle/TextTagger.py +0 -530
  1131. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1132. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1133. teradataml/analytics/mle/Unpack.py +0 -526
  1134. teradataml/analytics/mle/Unpivot.py +0 -438
  1135. teradataml/analytics/mle/VarMax.py +0 -776
  1136. teradataml/analytics/mle/VectorDistance.py +0 -762
  1137. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1138. teradataml/analytics/mle/XGBoost.py +0 -842
  1139. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1140. teradataml/analytics/mle/__init__.py +0 -123
  1141. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1142. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1143. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1144. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1145. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1146. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1147. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1148. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1149. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1150. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1151. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1152. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1153. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1154. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1155. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1156. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1157. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1158. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1159. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1160. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1161. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1162. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1163. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1164. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1165. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1166. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1167. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1168. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1169. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1170. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1171. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1172. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1173. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1174. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1175. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1176. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1177. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1178. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1179. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1180. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1181. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1182. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1183. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1184. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1185. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1186. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1187. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1188. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1189. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1190. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1191. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1192. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1193. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1194. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1195. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1196. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1197. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1198. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1199. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1200. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1201. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1202. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1203. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1204. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1205. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1206. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1207. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1208. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1209. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1210. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1211. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1212. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1213. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1214. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1215. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1216. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1217. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1218. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1219. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1220. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1221. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1222. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1223. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1224. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1225. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1226. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1227. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1228. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1229. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1230. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1231. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1232. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1233. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1234. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1235. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1236. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1237. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1238. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1239. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1240. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1241. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1242. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1243. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1244. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1245. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1246. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1247. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1248. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1249. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1250. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1251. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1252. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1253. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1254. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1255. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1256. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1257. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1258. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1259. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1260. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1261. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1262. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1263. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1264. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1265. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1266. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1267. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1268. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1269. teradataml/analytics/sqle/Antiselect.py +0 -321
  1270. teradataml/analytics/sqle/Attribution.py +0 -603
  1271. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1272. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1273. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1274. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1275. teradataml/analytics/sqle/NPath.py +0 -632
  1276. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1277. teradataml/analytics/sqle/Pack.py +0 -388
  1278. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1279. teradataml/analytics/sqle/Sessionize.py +0 -390
  1280. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1281. teradataml/analytics/sqle/Unpack.py +0 -503
  1282. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1283. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1284. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1285. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1286. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1287. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1288. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1289. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1290. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1291. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1292. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1293. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1294. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1295. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1296. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1297. teradataml/catalog/model_cataloging.py +0 -980
  1298. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1299. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1300. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1301. teradataml/table_operators/sandbox_container_util.py +0 -643
  1302. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1303. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1715 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2023 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Adithya Avvaru (adithya.avvaru@teradata.com)
7
+ # Secondary Owner: Pankaj Purandare (pankajvinod.purandare@teradata.com)
8
+ #
9
+ # Version: 1.0
10
+ # Function Version: 1.0
11
+ #
12
+ # This file contains object wrapper class for opensource packages and child object
13
+ # wrapper classes for each opensource package. Currently, we have child object
14
+ # wrapper class for scikit-learn.
15
+ #
16
+ # ##################################################################
17
+
18
+ from collections import OrderedDict, defaultdict
19
+ from importlib import import_module
20
+
21
+ import base64
22
+ import functools
23
+ import json
24
+ import numpy
25
+ import os
26
+ import pickle
27
+ import time
28
+ import inspect
29
+ import warnings
30
+ import json
31
+ import random
32
+ import pandas as pd
33
+ from teradatasqlalchemy import BLOB, CLOB, FLOAT, TIMESTAMP, VARCHAR, INTEGER
34
+ import pandas.api.types as pt
35
+
36
+ from teradataml import _TDML_DIRECTORY, Script, TeradataMlException, Apply
37
+ from teradataml.dataframe.copy_to import _get_sqlalchemy_mapping
38
+ from teradataml.common import pylogger
39
+ from teradataml.common.utils import UtilFuncs
40
+ from teradataml.context.context import _get_current_databasename, get_connection
41
+ from teradataml.dbutils.filemgr import install_file, remove_file
42
+ from teradataml.utils.utils import execute_sql
43
+ from teradataml.options.configure import configure
44
+ from teradataml.opensource.sklearn._wrapper_utils import _validate_fit_run, _generate_new_name,\
45
+ _validate_opensource_func_args, _derive_df_and_required_columns, _validate_df_query_type
46
+ from teradataml.opensource.sklearn.constants import OpenSourcePackage, _OSML_MODELS_PRIMARY_INDEX,\
47
+ _OSML_MODELS_TABLE_NAME, _OSML_MODELS_TABLE_COLUMNS_TYPE_DICT, OpensourceModels,\
48
+ _OSML_ADDITIONAL_COLUMN_TYPES
49
+ from teradataml.common.messagecodes import MessageCodes
50
+ from teradataml.common.messages import Messages
51
+ from teradataml.catalog.byom import save_byom, retrieve_byom, delete_byom
52
+ from teradataml.dbutils.dbutils import _create_table
53
+ from teradataml.utils.validators import _Validators
54
+ from teradataml.dataframe.dataframe import DataFrame
55
+ from teradataml.dataframe.dataframe_utils import DataFrameUtils
56
+ from teradataml.scriptmgmt.lls_utils import create_env, get_env
57
+ from teradataml.common.garbagecollector import GarbageCollector
58
+ from teradataml.common.constants import TeradataConstants
59
+
60
+
61
+ logger = pylogger.getLogger()
62
+
63
+ validator = _Validators()
64
+
65
+ installed_model_files = defaultdict(int)
66
+
67
+ class _GenericObjectWrapper:
68
+ def __init__(self) -> None:
69
+ self._db_name = _get_current_databasename()
70
+
71
+ self._scripts_path = os.path.join(_TDML_DIRECTORY, "data", "scripts", "sklearn")
72
+
73
+ # Some random number to be used as partition value if partition_columns is None for fit().
74
+ self._default_data_partition_value = -1001
75
+
76
+ self.modelObj = None
77
+ self._model_data = None
78
+
79
+ self._tdml_tmp_dir = GarbageCollector._get_temp_dir_name()
80
+
81
+ self._env = None
82
+
83
+ self._is_lake_system = UtilFuncs._is_lake()
84
+
85
+ if self._is_lake_system:
86
+ if configure.openml_user_env is not None:
87
+ self._env = configure.openml_user_env
88
+ else:
89
+ self._create_or_get_env()
90
+ else:
91
+ execute_sql(f"SET SESSION SEARCHUIFDBPATH = {self._db_name};")
92
+
93
+ def _create_or_get_env(self):
94
+ """
95
+ Internal function to return the env if already exists else
96
+ creates the environment using template file and return the env.
97
+ """
98
+ # Get the template file path.
99
+ template_dir_path = os.path.join(_TDML_DIRECTORY, "data", "templates",
100
+ "open_source_ml.json")
101
+
102
+ # Read template file.
103
+ with open(template_dir_path, "r") as r_file:
104
+ data = json.load(r_file)
105
+
106
+ # Get env_name.
107
+ _env_name = data["env_specs"][0]["env_name"]
108
+
109
+ try:
110
+ # Call function to 'openml_env' get env.
111
+ self._env = get_env(_env_name)
112
+ except TeradataMlException as tdml_e:
113
+ # We will get here when error says, env does not exist otherwise raise the exception as is.
114
+ # Env does not exist so create one.
115
+
116
+ exc_msg = "Failed to execute get_env(). User environment '{}' not " \
117
+ "found.".format(_env_name)
118
+ if exc_msg in tdml_e.args[0]:
119
+ print(f"No OpenAF environment with name '{_env_name}' found. Creating one with "\
120
+ "latest supported python and required packages.")
121
+ _env = create_env(template=template_dir_path)
122
+ else:
123
+ raise tdml_e
124
+ except Exception as exc:
125
+ raise exc
126
+
127
+ def _get_columns_as_list(self, cols):
128
+ """
129
+ Internal function to get columns as list of strings.
130
+ Empty list is returned if cols is None.
131
+ """
132
+ if cols is None:
133
+ return []
134
+ if not isinstance(cols, list) and not isinstance(cols, tuple):
135
+ return [cols]
136
+ return cols
137
+
138
+ def _get_data_and_data_partition_columns(self, data, feature_columns, label_columns,
139
+ partition_columns=None, group_columns=[]):
140
+ """
141
+ Internal function to generate one new partition column (if not provided) and return
142
+ data and partition columns (either generated or passed one).
143
+ """
144
+ new_partition_columns = self._get_columns_as_list(partition_columns)
145
+
146
+ if not partition_columns:
147
+ # If partition column is not specified, create a partition column and run Script.
148
+ # This runs the Script in one AMP as we are partitioning data using this column
149
+ # which contains only one value.
150
+ new_partition_columns = [_generate_new_name(type="column")]
151
+ data = data.assign(**{new_partition_columns[0]: self._default_data_partition_value})
152
+
153
+ # Filter out partition columns from feature columns and label columns.
154
+ new_partition_columns_filtered = [col for col in new_partition_columns
155
+ if col not in (feature_columns + label_columns + group_columns)]
156
+
157
+ all_columns = feature_columns + label_columns + group_columns + new_partition_columns_filtered
158
+ return data.select(all_columns), new_partition_columns
159
+
160
+ def _run_script(self, data, command, partition_columns, return_types):
161
+ """
162
+ Internal function to run Script(), given the argument needed by STO's or
163
+ Apply's Script.
164
+ """
165
+ if isinstance(partition_columns, list) and len(partition_columns) == 0:
166
+ partition_columns = None
167
+
168
+ if self._is_lake_system:
169
+ obj = Apply(data=data,
170
+ returns=OrderedDict(return_types),
171
+ apply_command=command,
172
+ data_partition_column=partition_columns,
173
+ env_name=self._env,
174
+ delimiter="\t")
175
+ else:
176
+ obj = Script(data=data,
177
+ returns=OrderedDict(return_types),
178
+ script_command=command,
179
+ data_partition_column=partition_columns)
180
+ obj.check_reserved_keyword = False
181
+
182
+ obj.skip_argument_validation = True
183
+ return obj.execute_script(output_style="TABLE")
184
+
185
+ def _install_script_file(self,
186
+ file_identifier=None,
187
+ file_name=None,
188
+ is_binary=False,
189
+ file_location=None):
190
+ """
191
+ Internal function to install script file in Vantage.
192
+ """
193
+ if file_location is None:
194
+ file_location = self._scripts_path
195
+ new_script = os.path.join(file_location, file_name)
196
+
197
+ # _env is set while object creation
198
+ # If not set, it is Vantage Enterprise. Otherwise, it is Vantage Lake.
199
+
200
+ if not self._is_lake_system:
201
+ status = install_file(file_identifier=file_identifier,
202
+ file_path=new_script,
203
+ replace=True,
204
+ suppress_output=True,
205
+ is_binary=is_binary)
206
+ else:
207
+ status = self._env.install_file(file_path=new_script,
208
+ replace=True,
209
+ suppress_output=True)
210
+ if not status:
211
+ raise TeradataMlException(
212
+ f"Script file '{file_name}' failed to get installed/replaced in Vantage."
213
+ )
214
+
215
+ def _get_data_col_types_and_partition_col_indices_and_types(self, data, partition_columns,
216
+ idx_delim=",",
217
+ types_delim="--"):
218
+ """
219
+ Internal function to get the data column types and partition column names, indices and types.
220
+ Function returns delimiter separated string of types and indices if idx_delim and
221
+ types_delim are provided. Otherwise, it returns list of types and indices. Partition names
222
+ are returned as list always.
223
+ """
224
+ data_column_types = "" if types_delim else []
225
+ partition_indices = "" if idx_delim else []
226
+ partition_types = "" if types_delim else []
227
+ new_partition_columns = []
228
+ j = 0
229
+ for i, col in enumerate(data.columns):
230
+ _type = data._td_column_names_and_sqlalchemy_types[col.lower()].python_type.__name__
231
+ if types_delim:
232
+ data_column_types += (_type if i == 0 else f"{types_delim}{_type}")
233
+ else:
234
+ data_column_types.append(_type)
235
+ if col in partition_columns:
236
+ new_partition_columns.append(col)
237
+ if idx_delim:
238
+ partition_indices += (str(i) if j == 0 else f"{idx_delim}{str(i)}")
239
+ else:
240
+ partition_indices.append(i)
241
+ if types_delim:
242
+ partition_types += (_type if j == 0 else f"{types_delim}{_type}")
243
+ else:
244
+ partition_types.append(_type)
245
+ j += 1
246
+ # Return types of all columns (as list or str), partition column indices (as list or str)
247
+ # and partition column types (as list or str).
248
+ return data_column_types, partition_indices, partition_types, new_partition_columns
249
+
250
+ def _get_kwargs_str(self, kwargs):
251
+ """
252
+ Returns string of kwargs in the format:
253
+ key1 val1-type1 key2 val2-type2 ...
254
+ """
255
+ args_str = ""
256
+ for key, val in kwargs.items():
257
+ strr = f"{key} {str(val)}-{type(val).__name__}"
258
+ if args_str == "":
259
+ args_str += strr
260
+ else:
261
+ args_str += f" {strr}"
262
+ return args_str
263
+
264
+ def extract_sklearn_obj(self, n_unique_partitions = 1, n_partition_cols = 1):
265
+ """
266
+ Internal function to extract sklearn object from the model(s) depending on the number of
267
+ partitions. When it is only one model, it is directly used as sklearn object (modelObj).
268
+ When it is multiple models, it is converted to pandas DataFrame and stored in sklearn
269
+ object.
270
+ """
271
+ vals = execute_sql("select * from {}".format(self._model_data._table_name)).fetchall()
272
+
273
+ # pickle will issue a caution warning, if model pickling was done with
274
+ # different library version than used here. The following disables any warnings
275
+ # that might otherwise show in the scriptlog files on the Advanced SQL Engine
276
+ # nodes in this case. Yet, do keep an eye for incompatible pickle versions.
277
+ warnings.filterwarnings("ignore")
278
+
279
+ model_obj = None
280
+ # Extract and unpickle last column which is the model object.
281
+ for i, row in enumerate(vals):
282
+ if self._is_lake_system:
283
+ model_obj = pickle.loads(row[n_partition_cols])
284
+ else:
285
+ model_obj = pickle.loads(base64.b64decode(row[n_partition_cols].partition("'")[2]))
286
+ row[n_partition_cols] = model_obj
287
+ vals[i] = row
288
+ if n_unique_partitions == 1:
289
+ self.modelObj = model_obj
290
+ elif n_unique_partitions > 1:
291
+ self.modelObj = pd.DataFrame(vals, columns=self._model_data.columns)
292
+ else:
293
+ ValueError("Number of partitions should be greater than 0.")
294
+
295
+ warnings.filterwarnings("default")
296
+
297
+
298
+ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
299
+ # This has to be set for every package which subclasses this class.
300
+ OPENSOURCE_PACKAGE_NAME = None
301
+
302
+ def __init__(self, model=None, module_name=None, class_name=None, pos_args=None, kwargs=None):
303
+ if not model and not module_name and not class_name:
304
+ raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, "model",
305
+ "module_name and class_name"),
306
+ MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
307
+
308
+ validator._validate_mutually_inclusive_arguments(module_name, "module_name",
309
+ class_name, "class_name")
310
+
311
+ super().__init__()
312
+
313
+ self.module_name = module_name
314
+ self.class_name = class_name
315
+ self.kwargs = kwargs if kwargs is not None else {}
316
+ self.pos_args = pos_args if pos_args is not None else tuple()
317
+
318
+ self._fit_label_columns_types = None
319
+ self._table_name_prefix = None
320
+
321
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
322
+ self._fit_partition_colums_non_default = None
323
+ self._is_default_partition_value_predict = True # False when the user provides partition columns.
324
+
325
+ def _validate_equality_of_partition_values(self, fit_values, trans_values):
326
+ """
327
+ Internal function to compare the partition values in fit() and predict() are same.
328
+ """
329
+ if len(fit_values) != len(trans_values):
330
+ return False
331
+
332
+ for val in fit_values:
333
+ if not all([val in trans_values]):
334
+ return False
335
+
336
+ return True
337
+
338
+ def _validate_unique_partition_values(self, data, partition_columns):
339
+ """
340
+ Internal function to validate if the partition values in partition_columns used in fit()
341
+ and predict() are same.
342
+ """
343
+ data._index_label = None
344
+ unique_values = data.drop_duplicate(partition_columns).get_values()
345
+
346
+ trans_unique_values = sorted(unique_values.tolist(), key=lambda x: tuple(x))
347
+ fit_unique_values = sorted(self._fit_partition_unique_values.tolist() \
348
+ if not isinstance(self._fit_partition_unique_values, list) \
349
+ else self._fit_partition_unique_values, key=lambda x: tuple(x))
350
+ default_unique_values = [[self._default_data_partition_value]]
351
+
352
+ if fit_unique_values == default_unique_values and \
353
+ trans_unique_values != default_unique_values:
354
+ error_msg = Messages.get_message(MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT,
355
+ "without", "with")
356
+ msg_code = MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT
357
+ raise TeradataMlException(error_msg, msg_code)
358
+
359
+ if not self._validate_equality_of_partition_values(fit_unique_values, trans_unique_values):
360
+ raise TeradataMlException(
361
+ Messages.get_message(MessageCodes.PARTITION_VALUES_NOT_MATCHING),
362
+ MessageCodes.PARTITION_VALUES_NOT_MATCHING
363
+ )
364
+
365
+ def fit(self, **kwargs):
366
+ pass
367
+
368
+ def __get_obj_attributes_multi_model(self, name):
369
+ """
370
+ Internal function to get attributes of all sklearn model objects when multiple models are
371
+ generated by fit.
372
+ """
373
+ # Wrapper function to invoke dynamic method, using arguments
374
+ # passed by user, on model in each row.
375
+ def __sklearn_method_invoker_for_multimodel(*c, **kwargs):
376
+ multi_models = self.modelObj.copy()
377
+ for i in range(multi_models.shape[0]):
378
+ curr_model = multi_models.iloc[i]["model"]
379
+ multi_models.at[i, "model"] = getattr(curr_model, name)(*c, **kwargs)
380
+ return multi_models.rename(columns={"model": name})
381
+
382
+ # Identify if attribute is callable or not to avoid
383
+ # this check in loop for every model.
384
+ is_attr_callable = False
385
+ # Assuming that self.modelObj will have at least 1 row.
386
+ is_attr_callable = callable(getattr(self.modelObj.iloc[0]["model"], name))
387
+
388
+ # If attribute is callable, it should be applied on model in each row
389
+ # using passed arguments.
390
+ if is_attr_callable:
391
+ return __sklearn_method_invoker_for_multimodel
392
+
393
+ output_attributes = self.modelObj.copy()
394
+ for i in range(output_attributes.shape[0]):
395
+ model = output_attributes.iloc[i]["model"]
396
+ output_attributes.at[i, "model"] = getattr(model, name)
397
+ return output_attributes.rename(columns={"model": name})
398
+
399
+ def __getattr__(self, name):
400
+ # This just run attributes (functions and properties) from sklearn object.
401
+ def __sklearn_method_invoker(*c, **kwargs):
402
+ return atrribute_instance(*c, **kwargs)
403
+ if isinstance(self.modelObj, pd.DataFrame):
404
+ return self.__get_obj_attributes_multi_model(name)
405
+
406
+ atrribute_instance = getattr(self.modelObj, name)
407
+ if callable(atrribute_instance):
408
+ return __sklearn_method_invoker
409
+ return atrribute_instance
410
+
411
+ @classmethod
412
+ def _validate_model_supportability(cls, model):
413
+ """
414
+ Internal function to validate if the model provided for deployment is supported by
415
+ teradataml's opensourceML.
416
+ """
417
+ error_msg = Messages.get_message(MessageCodes.MODEL_CATALOGING_OPERATION_FAILED, "validate",
418
+ "The given model is not a supported opensource model.")
419
+ msg_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
420
+ try:
421
+ # For scikit-learn, model.__module__ is similar to 'sklearn.linear_model._base'.
422
+ # TODO: check for other supported packages.
423
+ if model.__module__.split(".")[0] not in OpenSourcePackage.values():
424
+ raise TeradataMlException(error_msg, msg_code)
425
+ except Exception as ex:
426
+ # If in case, model.__module__ fails.
427
+ raise TeradataMlException(error_msg, msg_code) from ex
428
+
429
+ def _save_model(self, model_name, replace_if_exists=False):
430
+ """
431
+ Internal function to save the model stored in file at location mentioned by class variable
432
+ "model_file_path_local" to Vantage using BYOM methods save_byom() and delete_byom() based
433
+ on the value of "replace_if_exists" argument.
434
+ """
435
+ # Creating a table, if doesn't exist, in Vantage to store the model info.
436
+ conn = get_connection()
437
+ osml_models_table_exists = conn.dialect.has_table(conn,
438
+ table_name=_OSML_MODELS_TABLE_NAME,
439
+ schema=self._db_name)
440
+ if not osml_models_table_exists:
441
+ all_columns = _OSML_MODELS_TABLE_COLUMNS_TYPE_DICT.copy()
442
+ all_columns.update(_OSML_ADDITIONAL_COLUMN_TYPES)
443
+ _create_table(table_name=_OSML_MODELS_TABLE_NAME, columns=all_columns,
444
+ primary_index=_OSML_MODELS_PRIMARY_INDEX, schema_name=self._db_name)
445
+
446
+ model_obj = OpensourceModels(is_default_partition_value=self._is_default_partition_value_fit,
447
+ partition_file_prefix=self._model_file_name_prefix,
448
+ fit_partition_columns_non_default=self._fit_partition_colums_non_default,
449
+ model=self.modelObj,
450
+ pos_args=self.pos_args,
451
+ key_args=self.kwargs)
452
+
453
+ # Saved the model object to a file to be used in save_byom() for writing to Vantage table.
454
+ file_name = os.path.join(self._tdml_tmp_dir, "deployed_file.pickle")
455
+ with open(file_name, "wb+") as fp:
456
+ fp.write(pickle.dumps(model_obj))
457
+
458
+ try:
459
+ save_byom(model_id=model_name,
460
+ model_file=file_name,
461
+ table_name=_OSML_MODELS_TABLE_NAME,
462
+ additional_columns_types=_OSML_ADDITIONAL_COLUMN_TYPES,
463
+ additional_columns={"package": self.OPENSOURCE_PACKAGE_NAME.value})
464
+ except TeradataMlException as ex:
465
+ model_exists_msg = Messages.get_message(MessageCodes.MODEL_ALREADY_EXISTS, model_name)
466
+ if not replace_if_exists and model_exists_msg == str(ex):
467
+ raise
468
+ elif replace_if_exists and model_exists_msg == str(ex):
469
+ # Delete the model from Model table and save again.
470
+ delete_byom(model_id=model_name, table_name=_OSML_MODELS_TABLE_NAME)
471
+ save_byom(model_id=model_name,
472
+ model_file=file_name,
473
+ table_name=_OSML_MODELS_TABLE_NAME,
474
+ additional_columns_types=_OSML_ADDITIONAL_COLUMN_TYPES,
475
+ additional_columns={"package": self.OPENSOURCE_PACKAGE_NAME.value})
476
+ else:
477
+ raise
478
+ finally:
479
+ os.remove(file_name)
480
+
481
+ @classmethod
482
+ def _deploy(cls, model_name, model, replace_if_exists=False):
483
+ """
484
+ Internal function to create an instance of the class using the model and deploy
485
+ the model to Vantage.
486
+ """
487
+ cls._validate_model_supportability(model=model)
488
+
489
+ cls = cls(model=model)
490
+ # Load the model file into Vantage node as file can be used in
491
+ # predict or other operations.
492
+ cls._install_initial_model_file()
493
+
494
+ cls._save_model(model_name, replace_if_exists)
495
+
496
+ return cls
497
+
498
+ @classmethod
499
+ def _load(cls, model_name):
500
+ """
501
+ Internal function to load model corresponding to the package (like sklearn etc)
502
+ from Vantage to client using retrieve_byom() and create an instance of the class if
503
+ the model is from the same package.
504
+ """
505
+ try:
506
+ model = retrieve_byom(model_id=model_name, table_name=_OSML_MODELS_TABLE_NAME,
507
+ return_addition_columns=True)
508
+ except TeradataMlException as ex:
509
+ # Not showing table name in error message as it is an internal table.
510
+ part_msg = f"Model '{model_name}' not found in the table "
511
+ if part_msg in str(ex):
512
+ raise TeradataMlException(Messages.get_message(MessageCodes.MODEL_NOT_FOUND, model_name, ""),
513
+ MessageCodes.MODEL_NOT_FOUND)
514
+ raise
515
+
516
+ model_vals_list = model.get_values()[0]
517
+ # List of 3 elements -
518
+ # - model name as index column,
519
+ # - 1st contains model object with fields: is_default_partition_value, partition_file_prefix, model. etc
520
+ # - 2nd contains package name.
521
+ model_obj = pickle.loads(model_vals_list[0])
522
+ model = model_obj.model
523
+ package = model_vals_list[1]
524
+
525
+ if package != cls.OPENSOURCE_PACKAGE_NAME.value:
526
+ # Raise error if trying to access model of different package.
527
+ raise TeradataMlException(Messages.get_message(MessageCodes.MODEL_NOT_FOUND, model_name,
528
+ f". Requested model is from '{package}' package"),
529
+ MessageCodes.MODEL_NOT_FOUND)
530
+
531
+ if isinstance(model, pd.DataFrame):
532
+ # Create a new instance of the class and set the model object to the instance.
533
+ # Instantiation can take only model, not model object. Hence, passing one of the model
534
+ # from pandas df. Updating modelObj and other fields later
535
+ cls = cls(model=model.iloc[1,2])
536
+ cls.modelObj = model
537
+ cls._fit_partition_unique_values = [lst[:len(lst)-1] for lst in model.values.tolist()]
538
+ else:
539
+ cls = cls(model=model)
540
+
541
+ cls._model_file_name_prefix = model_obj.partition_file_prefix
542
+ cls._is_default_partition_value_fit = model_obj.is_default_partition_value
543
+ cls._fit_partition_colums_non_default = model_obj.fit_partition_columns_non_default
544
+ cls.pos_args = model_obj.pos_args
545
+ cls.kwargs = model_obj.key_args
546
+
547
+ # Load the model file into Vantage node as file can be used in
548
+ # predict or other operations.
549
+ cls._install_initial_model_file()
550
+
551
+ return cls
552
+
553
+ def deploy(self, model_name, replace_if_exists=False):
554
+ """
555
+ DESCRIPTION:
556
+ Deploys the model held by interface object to Vantage.
557
+
558
+ PARAMETERS:
559
+ model_name:
560
+ Required Argument.
561
+ Specifies the unique name of the model to be deployed.
562
+ Types: str
563
+
564
+ replace_if_exists:
565
+ Optional Argument.
566
+ Specifies whether to replace the model if a model with the same name already
567
+ exists in Vantage. If this argument is set to False and a model with the same
568
+ name already exists, then the function raises an exception.
569
+ Default Value: False
570
+ Types: bool
571
+
572
+ RETURNS:
573
+ The opensource object wrapper.
574
+
575
+ RAISES:
576
+ TeradataMLException if model with "model_name" already exists and the argument
577
+ "replace_if_exists" is set to False.
578
+
579
+ EXAMPLES:
580
+ >>> from teradataml import td_sklearn
581
+ >>> model = td_sklearn.LinearRegression(normalize=True)
582
+ >>> model
583
+ LinearRegression(normalize=True)
584
+
585
+ # Example 1: Deploy the model held by interface object to Vantage.
586
+ >>> lin_reg = model.deploy("linreg_model_ver_2")
587
+ Model is saved.
588
+ >>> lin_reg
589
+ LinearRegression(normalize=True)
590
+
591
+ # Example 2: Deploy the model held by interface object to Vantage with the name same
592
+ # as that of model that already existed in Vantage.
593
+ >>> lin_reg = model.deploy("linreg_model_ver_2", replace_if_exists=True)
594
+ Model is deleted.
595
+ Model is saved.
596
+ >>> lin_reg
597
+ LinearRegression(normalize=True)
598
+ """
599
+
600
+ # Install model file into Vantage, if not installed.
601
+ self._install_initial_model_file()
602
+
603
+ self._save_model(model_name, replace_if_exists)
604
+ return self
605
+
606
+
607
+ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
608
+
609
+ OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.SKLEARN
610
+
611
+ def __init__(self, model=None, module_name=None, class_name=None, pos_args=None, kwargs=None):
612
+ super().__init__(model=model, module_name=module_name, class_name=class_name,
613
+ pos_args=pos_args, kwargs=kwargs)
614
+
615
+ self._initialize_variables()
616
+ if model:
617
+ self.modelObj = model
618
+ self.module_name = model.__module__.split("._")[0]
619
+ self.class_name = model.__class__.__name__
620
+ # __dict__ gets all the arguments as dictionary including default ones and positional
621
+ # args.
622
+ self.kwargs = model.__dict__
623
+ self.pos_args = tuple() # Kept empty as all are moved to kwargs.
624
+ else:
625
+ self._initialize_object()
626
+
627
+ def __repr__(self):
628
+ if self._is_default_partition_value_fit:
629
+ # Single model use case.
630
+ return self.modelObj.__repr__()
631
+
632
+ pd.set_option("display.expand_frame_repr", None)
633
+ pd.set_option("display.max_colwidth", None)
634
+ opt = self.modelObj.__repr__()
635
+ pd.reset_option("display.expand_frame_repr")
636
+ pd.reset_option("display.max_colwidth")
637
+ return opt
638
+
639
+ def _validate_args_and_get_data(self, X=None, y=None, groups=None, kwargs={},
640
+ skip_either_or_that=False):
641
+ """
642
+ Internal function to validate arguments passed to exposed opensource APIs and return
643
+ parent DataFrame, feature columns, label columns, group columns, data partition columns.
644
+ """
645
+ _validate_opensource_func_args(X=X, y=y, groups=groups,
646
+ fit_partition_cols=self._fit_partition_colums_non_default,
647
+ kwargs=kwargs,
648
+ skip_either_or_that=skip_either_or_that)
649
+ return _derive_df_and_required_columns(X=X, y=y, groups=groups, kwargs=kwargs,
650
+ fit_partition_cols=self._fit_partition_colums_non_default)
651
+
652
+ def _initialize_object(self):
653
+ """
654
+ Internal function to initialize sklearn object from module name and class name.
655
+ """
656
+ # Needed when writing imported modules to generated file. TODO: Remove later.
657
+ imported_args = {}
658
+ # If there are any objects of class `_SkLearnObjectWrapper`, it is modified to
659
+ # corresponding sklearn object.
660
+ new_sklearn_pos_args = self.modify_args(None, self.pos_args, imported_args)
661
+ new_sklearn_kwargs = self.modify_args(None, self.kwargs, imported_args)
662
+
663
+ # Create model object from new positional and keyword arguments.
664
+ class_obj = getattr(import_module(self.module_name), self.class_name)
665
+ if new_sklearn_pos_args:
666
+ self.modelObj = class_obj(*new_sklearn_pos_args, **new_sklearn_kwargs)
667
+ else:
668
+ self.modelObj = class_obj(**new_sklearn_kwargs)
669
+
670
+ # All arguments are moved to kwargs and kept pos_args empty.
671
+ # Might help in set_params() bug fix.
672
+ self.pos_args = tuple()
673
+ _arguments = self.modelObj.__dict__
674
+
675
+ if hasattr(self.modelObj, "get_params"):
676
+ # Update kwargs that are both in modelObj and get_params() as there are
677
+ # some classes which return other internals variables also.
678
+ # Hence, filtering them using get_params().
679
+ for k, v in _arguments.items():
680
+ if type(v).__name__ in ["function", "generator"]:
681
+ # TODO: ELE-6351: Skipping adding functions and generators to kwargs as these
682
+ # are not supported yet due to pickling issue.
683
+ continue
684
+ if k in self.get_params():
685
+ self.kwargs[k] = v
686
+ else:
687
+ # Model selection classes will not have `get_params`, in which case modelObj's __dict__
688
+ # is saved as kwargs.
689
+ self.kwargs = _arguments
690
+
691
+ def _initialize_variables(self):
692
+ """
693
+ Internal function to initialize variables used in this class.
694
+ """
695
+ self.feature_names_in_ = None
696
+ self._table_name_prefix = "td_sklearn_"
697
+ self._model_file_name_prefix = _generate_new_name(type="file")
698
+ self.model_file_paths_local = set()
699
+
700
+ self._fit_execution_time = None
701
+ self._fit_predict_execution_time = None
702
+ self._partial_fit_execution_time = None
703
+ self._predict_execution_time = None
704
+ self._transform_execution_time = None
705
+ self._score_execution_time = None
706
+
707
+ # Set to partition columns when training is done with partition columns.
708
+ self._fit_partition_colums_non_default = None
709
+
710
+ self._is_model_installed = False
711
+ self._fit_partition_unique_values = [[self._default_data_partition_value]]
712
+
713
+ def modify_args(self, fp1, arg, imported_args):
714
+ """
715
+ Internal function to recursively (if "arg" is list/tuple/dict) check if any sklearn object
716
+ of opensourceML is present in the argument "arg" and modify it to corresponding sklearn
717
+ object.
718
+ This function can also be used to write import statements to file (if "fp1" is not
719
+ None). Update "imported_args" dictionary with imported module and class name to avoid
720
+ importing same module and class again when writing to file. This is useful when we want to
721
+ generate script from template file.
722
+ Pass None to "fp1" if we don't want to write to file and just modify opensourceML sklearn
723
+ object to corresponding sklearn object.
724
+ """
725
+ if isinstance(arg, type(self)):
726
+ imported_tuple = (arg.module_name, arg.class_name)
727
+ already_imported = imported_args.get(imported_tuple, False)
728
+ if not already_imported:
729
+ imported_args[imported_tuple] = True
730
+ if fp1:
731
+ fp1.write(f"from {arg.module_name} import {arg.class_name}\n")
732
+ self.modify_args(fp1, arg.pos_args, imported_args)
733
+ self.modify_args(fp1, arg.kwargs, imported_args)
734
+ return arg.modelObj
735
+ elif isinstance(arg, list):
736
+ return [self.modify_args(fp1, val, imported_args) for val in arg]
737
+ elif isinstance(arg, tuple):
738
+ return tuple([self.modify_args(fp1, val, imported_args) for val in arg])
739
+ elif type(arg).__name__ == "generator":
740
+ # Raising exception as generator object can't be pickled.
741
+ # TODO: ELE-6351 - Find ways to pickle generator object later.
742
+ raise ValueError("Generator type/iterator is not supported for any argument. "\
743
+ "Support will be added later.")
744
+ elif type(arg).__name__ == "function":
745
+ # Raising exception as functions/lambda functions can't be pickled.
746
+ # TODO: ELE-6351 - Find ways to pickle functions later.
747
+ raise ValueError("Functions are not supported for any argument. "\
748
+ "Support will be added later.")
749
+ elif isinstance(arg, dict):
750
+ return dict(
751
+ (
752
+ self.modify_args(fp1, k, imported_args),
753
+ self.modify_args(fp1, v, imported_args),
754
+ )
755
+ for k, v in arg.items()
756
+ )
757
+ else:
758
+ return arg
759
+
760
+ def _install_initial_model_file(self):
761
+ """
762
+ If model file(s) is/are not installed in Vantage, then install it/them.
763
+ """
764
+ if isinstance(self.modelObj, pd.DataFrame):
765
+ # Get list of unique partition values and corresponding model object as dict.
766
+ partition_values_model_dict = {}
767
+ obj_list = self.modelObj.values.tolist()
768
+ for lst in obj_list:
769
+ partition_values_model_dict[tuple(lst[:len(lst)-1])] = lst[len(lst)-1]
770
+
771
+ for partition in self._fit_partition_unique_values:
772
+ # Create a new file with file name with partition values and
773
+ # dump sklearn object into it. Finally install the file to Vantage.
774
+ partition_join = "_".join([str(x) for x in partition])
775
+ file_name = f"{self._model_file_name_prefix}_{partition_join}"
776
+ # Replace '-' with '_' as '-' can't be present in file identifier.
777
+ # Needed this replace because partition_columns can be negative.
778
+ file_name = file_name.replace("-", "_")
779
+ full_file_name = os.path.join(self._tdml_tmp_dir, file_name)
780
+ with open(full_file_name, "wb+") as fp:
781
+ # Write sklearn object to file.
782
+ if isinstance(self.modelObj, pd.DataFrame):
783
+ # If multiple models, then write the model corresponding to the partition value.
784
+ fp.write(pickle.dumps(partition_values_model_dict[tuple(partition)]))
785
+ else:
786
+ fp.write(pickle.dumps(self.modelObj))
787
+ self.model_file_paths_local.add(file_name)
788
+
789
+ self._install_script_file(file_identifier=file_name,
790
+ file_name=file_name,
791
+ is_binary=True,
792
+ file_location=self._tdml_tmp_dir)
793
+
794
+ if self._is_lake_system:
795
+ # Need to pass env_name along with file_name for cleaning up the files in env.
796
+ obj = f"{self._env.env_name}::{file_name}"
797
+ if installed_model_files[obj] == 0:
798
+ # Add to GC for the first time the model file (along with env name) is encountered.
799
+ installed_model_files[obj] = 1
800
+ GarbageCollector._add_to_garbagecollector(object_name=obj,
801
+ object_type=TeradataConstants.TERADATA_APPLY)
802
+ else:
803
+ if installed_model_files[file_name] == 0:
804
+ # Add to GC for the first time the model file is encountered.
805
+ installed_model_files[file_name] = 1
806
+ GarbageCollector._add_to_garbagecollector(object_name=file_name,
807
+ object_type=TeradataConstants.TERADATA_SCRIPT)
808
+
809
+ self._is_model_installed = True
810
+
811
+ def _run_fit_related_functions(self,
812
+ data,
813
+ feature_columns,
814
+ label_columns,
815
+ partition_columns,
816
+ func,
817
+ classes=None):
818
+ """
819
+ Internal function to run fit() and partial_fit() functions.
820
+ """
821
+ label_columns = self._get_columns_as_list(label_columns)
822
+
823
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
824
+ feature_columns,
825
+ label_columns,
826
+ partition_columns)
827
+
828
+ model_type = BLOB() if self._is_lake_system else CLOB()
829
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
830
+ for col in new_partition_columns] + [("model", model_type)]
831
+
832
+ file_name = "sklearn_fit.py"
833
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
834
+
835
+ if classes:
836
+ class_type = type(classes[0]).__name__
837
+ classes = "--".join([str(x) for x in classes])
838
+ else:
839
+ classes = str(None)
840
+ class_type = str(None)
841
+
842
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
843
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
844
+
845
+ # db_name is applicable for enterprise system.
846
+ db_file_name = file_name if self._is_lake_system else f"./{self._db_name}/{file_name}"
847
+ py_exc = UtilFuncs._get_python_execution_path()
848
+ script_command = f"{py_exc} {db_file_name} {func} {len(feature_columns)} "\
849
+ f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
850
+ f"{self._model_file_name_prefix} {classes} {class_type} {self._is_lake_system}"
851
+
852
+ # Get unique values in partitioning columns.
853
+ self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
854
+
855
+ self._install_initial_model_file()
856
+
857
+ self._model_data = self._run_script(data, script_command, new_partition_columns,
858
+ return_types)
859
+
860
+ # Extract sklearn object(s) from the depending on the number of unique partitioning values.
861
+ self.extract_sklearn_obj(n_unique_partitions=len(self._fit_partition_unique_values),
862
+ n_partition_cols=len(new_partition_columns))
863
+
864
+ # Need this label columns types in prediction.
865
+ self._fit_label_columns_types = [data._td_column_names_and_sqlalchemy_types[l_c.lower()]
866
+ for l_c in label_columns]
867
+
868
+ def partial_fit(self, X=None, y=None, classes=None, **kwargs):
869
+ """
870
+ Please check the description in Docs/OpensourceML/sklearn.py.
871
+ """
872
+ st_time = time.time()
873
+
874
+ # "classes" argument validation.
875
+ arg_info_matrix = []
876
+ arg_info_matrix.append(["classes", classes, True, (list)])
877
+ _Validators._validate_function_arguments(arg_info_matrix)
878
+
879
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
880
+
881
+ data, feature_columns, label_columns, _, partition_columns = \
882
+ self._validate_args_and_get_data(X=X, y=y, groups=None, kwargs=kwargs)
883
+
884
+ if partition_columns:
885
+ self._is_default_partition_value_fit = False
886
+ self._fit_partition_colums_non_default = partition_columns
887
+
888
+ self._run_fit_related_functions(data,
889
+ feature_columns,
890
+ label_columns,
891
+ partition_columns,
892
+ inspect.stack()[0][3],
893
+ classes)
894
+
895
+ self._partial_fit_execution_time = time.time() - st_time
896
+
897
+ return self
898
+
899
+ def fit(self, X=None, y=None, **kwargs):
900
+ """
901
+ Please check the description in Docs/OpensourceML/sklearn.py.
902
+ """
903
+ st_time = time.time()
904
+
905
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
906
+
907
+ data, feature_columns, label_columns, _, partition_columns = \
908
+ self._validate_args_and_get_data(X=X, y=y, groups=None, kwargs=kwargs)
909
+
910
+ if partition_columns:
911
+ self._is_default_partition_value_fit = False
912
+ self._fit_partition_colums_non_default = partition_columns
913
+
914
+ self._run_fit_related_functions(data,
915
+ feature_columns,
916
+ label_columns,
917
+ partition_columns,
918
+ inspect.stack()[0][3])
919
+
920
+ self._fit_execution_time = time.time() - st_time
921
+
922
+ return self
923
+
924
+ def set_params(self, **params):
925
+ """
926
+ Please check the description in Docs/OpensourceML/sklearn.py.
927
+ """
928
+ for key, val in params.items():
929
+ self.kwargs[key] = val
930
+
931
+ # Initialize with new arguments and return the class/model object.
932
+ # set_params takes all keyword arguments and no positional arguments.
933
+ self.__init__(None, self.module_name, self.class_name, tuple(), self.kwargs)
934
+ return self
935
+
936
+ # get_params() will be executed through __getattr__().
937
+
938
+ # @_validate_fit_run
939
+ def __getattr__(self, name):
940
+ def __run_transform(*c, **kwargs):
941
+ kwargs["name"] = name
942
+ return self._transform(*c, **kwargs)
943
+
944
+ def __run_function_needing_all_rows(*c, **kwargs):
945
+ kwargs["name"] = name
946
+ return self._run_function_needing_all_rows(*c, **kwargs)
947
+
948
+ def __run_kneighbors(*c, **kwargs):
949
+ kwargs["name"] = name
950
+ return self._run_neighbors(*c, **kwargs)
951
+
952
+ if name in ["score", "aic", "bic", "perplexity"]:
953
+ # TODO: ELE-6352 - Implement error_norm() function later.
954
+ return __run_function_needing_all_rows
955
+
956
+ if name in ["kneighbors",
957
+ "radius_neighbors",
958
+ "kneighbors_graph",
959
+ "radius_neighbors_graph"]:
960
+ return __run_kneighbors
961
+
962
+ if name in ["predict",
963
+ "transform",
964
+ "inverse_transform",
965
+ "predict_proba",
966
+ "predict_log_proba",
967
+ "decision_function",
968
+ "score_samples",
969
+ "decision_path",
970
+ "apply",
971
+ "cost_complexity_pruning_path",
972
+ "gibbs",
973
+ "kneighbors_graph",
974
+ "radius_neighbors_graph",
975
+ "mahalanobis",
976
+ "correct_covariance",
977
+ "reweight_covariance",
978
+ "path"]:
979
+ return __run_transform
980
+
981
+ return super().__getattr__(name)
982
+
983
+ def _get_return_columns_for_function_(self,
984
+ data,
985
+ feature_columns,
986
+ label_columns,
987
+ func_name,
988
+ kwargs):
989
+ """
990
+ Internal function to return list of column names and their sqlalchemy types
991
+ which should be used in return_types of Script.
992
+ """
993
+ if func_name == "fit_predict":
994
+ """
995
+ Get return columns using label_columns.
996
+ """
997
+ return [(f"{self.class_name.lower()}_{func_name}_{(i + 1)}",
998
+ data._td_column_names_and_sqlalchemy_types[col.lower()])
999
+ for i, col in enumerate(label_columns)]
1000
+ if func_name == "predict":
1001
+ """
1002
+ Return predict columns using either label_columns (if provided) or
1003
+ self._fit_label_columns_types (if the function is trained using label columns).
1004
+ Otherwise run predict on ten rows of data to get the number of columns and their types
1005
+ after this if condition.
1006
+ """
1007
+ if label_columns:
1008
+ return [(f"{self.class_name.lower()}_{func_name}_{(i + 1)}",
1009
+ data._td_column_names_and_sqlalchemy_types[col.lower()])
1010
+ for i, col in enumerate(label_columns)]
1011
+ if self._fit_label_columns_types:
1012
+ return [(f"{self.class_name.lower()}_{func_name}_{(i + 1)}", col_type)
1013
+ for i, col_type in enumerate(self._fit_label_columns_types)]
1014
+
1015
+ data = data.select(feature_columns + label_columns)
1016
+
1017
+ ## If function is not `fit_predict`:
1018
+ # then take one row of transform/other functions to execute in client
1019
+ # to get number of columns in return clause and their Vantage types.
1020
+ n_f = len(feature_columns)
1021
+ n_c = len(label_columns)
1022
+
1023
+ # For paritioning columns, it will be a dataframe and getattr(modelObj, func_name) fails.
1024
+ # Just for getting the number of columns and their types, using only one model of all.
1025
+ if len(self._fit_partition_unique_values) == 1:
1026
+ # Single model case.
1027
+ skl_obj = self.modelObj
1028
+ else:
1029
+ # Multi model case.
1030
+ skl_obj = self.modelObj.iloc[0]["model"]
1031
+
1032
+ ten_row_data = data.head(10).get_values()
1033
+ X = numpy.array(ten_row_data)
1034
+ if label_columns:
1035
+ y = X[:,n_f : n_f + n_c]
1036
+ X = X[:,:n_f]
1037
+ # predict() now takes 'y' also for it to return the labels from script. Skipping 'y'
1038
+ # in local run if passed. Generally, 'y' is passed to return y along with actual output.
1039
+ try:
1040
+ trans_opt = getattr(skl_obj, func_name)(X, y, **kwargs)
1041
+ except TypeError as ex:
1042
+ # Function which does not accept 'y' like predict_proba() raises error like
1043
+ # "predict_proba() takes 2 positional arguments but 3 were given".
1044
+ trans_opt = getattr(skl_obj, func_name)(X, **kwargs)
1045
+ else:
1046
+ trans_opt = getattr(skl_obj, func_name)(X, **kwargs)
1047
+
1048
+ if func_name == "path":
1049
+ raise NotImplementedError(
1050
+ "path() returns tuple of ndarrays of different shapes. Not Implemented yet."
1051
+ )
1052
+
1053
+ if isinstance(trans_opt, numpy.ndarray) and trans_opt.shape == (X.shape[0],):
1054
+ trans_opt = trans_opt.reshape(X.shape[0], 1)
1055
+
1056
+ if type(trans_opt).__name__ in ["csr_matrix", "csc_matrix"]:
1057
+ no_of_columns = trans_opt.get_shape()[1]
1058
+ trans_opt = trans_opt.toarray()
1059
+ elif isinstance(trans_opt, dict):
1060
+ raise NotImplementedError(f"Output returns dictionary {trans_opt}. NOT implemented yet.")
1061
+ elif isinstance(trans_opt[0], numpy.ndarray) \
1062
+ or isinstance(trans_opt[0], list) \
1063
+ or isinstance(trans_opt[0], tuple):
1064
+ no_of_columns = len(trans_opt[0])
1065
+ else:
1066
+ no_of_columns = 1
1067
+
1068
+ # Special handling when inverse_transform of no_of_columns returns no of rows
1069
+ # less than the no of classes. Such columns are filled with NaN values.
1070
+ # Updating number of columns here (new columns with NaN values will be added).
1071
+ if func_name == "inverse_transform" and self.class_name == "MultiLabelBinarizer":
1072
+ no_of_columns = len(self.classes_)
1073
+ for i in range(len(ten_row_data)):
1074
+ trans_opt[i] += tuple([numpy.nan] * (no_of_columns - len(trans_opt[i])))
1075
+
1076
+ # Special handling required for cross_decomposition classes's transform function, which
1077
+ # takes label columns also. In this case, output is a tuple of numpy arrays - x_scores and
1078
+ # y_scores. If label columns are not provided, only x_scores are returned.
1079
+ if self.module_name == "sklearn.cross_decomposition" and func_name == "transform":
1080
+ # For cross_decomposition, output is a tuple of arrays when label columns are provided
1081
+ # along with feature columns for transform function. In this case, concatenate the
1082
+ # arrays and return the column names accordingly.
1083
+ if isinstance(trans_opt, tuple): # tuple when label_columns is provided.
1084
+ assert trans_opt[0].shape == trans_opt[1].shape,\
1085
+ "Output arrays should be of same shape when transform/fit_transform is run "\
1086
+ "with label columns for cross_decomposition classes.."
1087
+ first_cols = [f"x_scores_{(i + 1)}" for i in range(trans_opt[0].shape[1])]
1088
+ second_cols = [f"y_scores_{(i + 1)}" for i in range(trans_opt[1].shape[1])]
1089
+ no_of_columns = trans_opt[0].shape[1] + trans_opt[1].shape[1]
1090
+ col_names = first_cols + second_cols
1091
+
1092
+ trans_opt = numpy.concatenate(trans_opt, axis=1)
1093
+ else:
1094
+ assert isinstance(trans_opt, numpy.ndarray), "When transform/fit_transform is run "\
1095
+ "without label columns for cross_decomposition classes, "\
1096
+ "output should be a numpy array."
1097
+ no_of_columns = trans_opt.shape[1]
1098
+ col_names =[f"x_scores_{(i + 1)}" for i in range(trans_opt.shape[1])]
1099
+ else:
1100
+ # Generate list of new column names.
1101
+ col_names = [f"{self.class_name.lower()}_{func_name}_{(i + 1)}" for i in range(no_of_columns)]
1102
+
1103
+ # Get new column sqlalchemy types for pandas df columns of transform output.
1104
+ opt_pd = pd.DataFrame(trans_opt)
1105
+
1106
+ # Get output column types for each column in pandas df from the output of transform
1107
+ # type functions.
1108
+ types = {}
1109
+ for idx, col in enumerate(list(opt_pd.columns)):
1110
+ # Get type of column using data from all rows, in case if the column has None values.
1111
+ # 'and' of types of all values in the column with type(None) gives the type of the column.
1112
+ type_ = type(None)
1113
+ for i in range(len(trans_opt)):
1114
+ type_ = type_ and type(trans_opt[i][idx])
1115
+
1116
+ # If all the values of the output (trans_opt) is None, thelen use `str` as type since
1117
+ # pandas astype() does not accept None type.
1118
+ if type_ is type(None):
1119
+ type_ = str
1120
+
1121
+ # numpy integer columns with nan values can't be typecasted using pd.astype() to int64.
1122
+ # It raises error like "Cannot convert non-finite values (NA or inf) to integer:
1123
+ # Error while type casting for column '2'"
1124
+ # Hence, using pd.Int64Dtype() for integer columns with nan values.
1125
+ types[col] = type_ if type_ != numpy.int64 else pd.Int64Dtype()
1126
+
1127
+ # Without this, all columns will be of object type and gets converted to VARCHAR in Vantage.
1128
+ opt_pd = opt_pd.astype(types)
1129
+
1130
+ # If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
1131
+ # TIMESTAMP(timezone=True) else map it according to default value.
1132
+ col_types = [TIMESTAMP(timezone=True)
1133
+ if pt.is_datetime64_ns_dtype(opt_pd.dtypes[key]) and (opt_pd[col_name].dt.tz is not None)
1134
+ else _get_sqlalchemy_mapping(str(opt_pd.dtypes[key]))
1135
+ for key, col_name in enumerate(list(opt_pd.columns))]
1136
+
1137
+ return [(c_name, c_type) for c_name, c_type in zip(col_names, col_types)]
1138
+
1139
+ @_validate_fit_run
1140
+ def _run_function_needing_all_rows(self, X=None, y=None, **kwargs):
1141
+ """
1142
+ Internal function to run functions like score, aic, bic which needs all rows and return
1143
+ one floating number as result.
1144
+ """
1145
+ st_time = time.time()
1146
+
1147
+ assert kwargs["name"], "function name should be passed."
1148
+ func_name = kwargs["name"]
1149
+
1150
+ # Remove 'name' to pass other kwargs to script. TODO: Not passing it now.
1151
+ kwargs.pop("name")
1152
+
1153
+ data, feature_columns, label_columns, _, partition_columns = \
1154
+ self._validate_args_and_get_data(X=X, y=y, groups=None, kwargs=kwargs)
1155
+
1156
+ label_columns = self._get_columns_as_list(label_columns)
1157
+
1158
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
1159
+ feature_columns,
1160
+ label_columns,
1161
+ partition_columns)
1162
+
1163
+ file_name = "sklearn_score.py"
1164
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1165
+
1166
+ script_file_path = f"{file_name}" if self._is_lake_system \
1167
+ else f"./{self._db_name}/{file_name}"
1168
+
1169
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
1170
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
1171
+
1172
+ self._validate_unique_partition_values(data, new_partition_columns)
1173
+
1174
+ py_exc = UtilFuncs._get_python_execution_path()
1175
+ script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1176
+ f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
1177
+ f"{self._model_file_name_prefix} {self._is_lake_system}"
1178
+
1179
+ # score, aic, bic returns float values.
1180
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1181
+ for col in new_partition_columns] + [(func_name, FLOAT())]
1182
+
1183
+ self._install_initial_model_file()
1184
+
1185
+ opt = self._run_script(data, script_command, new_partition_columns, return_types)
1186
+
1187
+ self._score_execution_time = time.time() - st_time
1188
+
1189
+ if self._is_default_partition_value_fit:
1190
+ # For single model case, partition column is internally generated and
1191
+ # no point in returning it to the user.
1192
+ return opt.select(func_name)
1193
+
1194
+ return opt
1195
+
1196
+ @_validate_fit_run
1197
+ def _transform(self, X=None, y=None, **kwargs):
1198
+ """
1199
+ Internal function to run predict/transform and similar functions, which returns
1200
+ multiple columns. This function will return data row along with the generated
1201
+ columns' row data, unlike sklearn's functions which returns just output data.
1202
+ """
1203
+ st_time = time.time()
1204
+
1205
+ assert kwargs["name"], "function name should be passed."
1206
+ func_name = kwargs["name"]
1207
+
1208
+ # Remove 'name' to pass other kwargs to script. TODO: Not passing it now.
1209
+ kwargs.pop("name")
1210
+
1211
+ data, feature_columns, label_columns, _, partition_columns = \
1212
+ self._validate_args_and_get_data(X=X, y=y, groups=None, kwargs=kwargs)
1213
+
1214
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
1215
+ feature_columns,
1216
+ label_columns,
1217
+ partition_columns)
1218
+
1219
+ # Since kwargs are passed to transform, removing additional unrelated arguments from kwargs.
1220
+ if "data" in kwargs:
1221
+ kwargs.pop("data")
1222
+ if "feature_columns" in kwargs:
1223
+ kwargs.pop("feature_columns")
1224
+ if "group_columns" in kwargs:
1225
+ kwargs.pop("group_columns")
1226
+ if "partition_columns" in kwargs:
1227
+ kwargs.pop("partition_columns")
1228
+ if "label_columns" in kwargs:
1229
+ kwargs.pop("label_columns")
1230
+
1231
+ file_name = "sklearn_transform.py"
1232
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1233
+
1234
+ script_file_path = f"{file_name}" if self._is_lake_system \
1235
+ else f"./{self._db_name}/{file_name}"
1236
+
1237
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
1238
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
1239
+
1240
+ self._validate_unique_partition_values(data, new_partition_columns)
1241
+
1242
+ py_exc = UtilFuncs._get_python_execution_path()
1243
+ script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1244
+ f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
1245
+ f"{self._model_file_name_prefix} {self._is_lake_system}"
1246
+
1247
+ # Returning feature columns also along with transformed columns because we don't know the
1248
+ # mapping of feature columns to the transformed columns.
1249
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1250
+ for col in (new_partition_columns + feature_columns)]
1251
+ if func_name in ["predict", "decision_function"] and label_columns:
1252
+ return_types += [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1253
+ for col in label_columns]
1254
+ return_types += self._get_return_columns_for_function_(data,
1255
+ feature_columns,
1256
+ label_columns,
1257
+ func_name,
1258
+ kwargs)
1259
+
1260
+ # Installing model files before running sklearn_transform.py.
1261
+ self._install_initial_model_file()
1262
+
1263
+ opt = self._run_script(data, script_command, new_partition_columns, return_types)
1264
+
1265
+ self._transform_execution_time = time.time() - st_time
1266
+
1267
+ return self._get_returning_df(opt, new_partition_columns, return_types)
1268
+
1269
+ def fit_predict(self, X=None, y=None, **kwargs):
1270
+ """
1271
+ Please check the description in Docs/OpensourceML/sklearn.py.
1272
+ """
1273
+ st_time = time.time()
1274
+
1275
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
1276
+
1277
+ data, feature_columns, label_columns, _, partition_columns = \
1278
+ self._validate_args_and_get_data(X=X, y=y, groups=None, kwargs=kwargs)
1279
+
1280
+ if partition_columns:
1281
+ self._is_default_partition_value_fit = False
1282
+
1283
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
1284
+ feature_columns,
1285
+ label_columns,
1286
+ partition_columns)
1287
+
1288
+ # Return label_columns also if user provides in the function call.
1289
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1290
+ for col in (new_partition_columns + feature_columns + label_columns)]
1291
+
1292
+ func_name = inspect.stack()[0][3]
1293
+ if label_columns:
1294
+ return_types += self._get_return_columns_for_function_(data,
1295
+ feature_columns,
1296
+ label_columns,
1297
+ func_name,
1298
+ {})
1299
+ else:
1300
+ # If there are no label_columns, we will have only one
1301
+ # predicted column.
1302
+ return_types += [(f"{self.class_name.lower()}_{func_name}_1", FLOAT())]
1303
+
1304
+ file_name = "sklearn_fit_predict.py"
1305
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1306
+
1307
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
1308
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
1309
+
1310
+ script_file_name = f"{file_name}" if self._is_lake_system \
1311
+ else f"./{self._db_name}/{file_name}"
1312
+ py_exc = UtilFuncs._get_python_execution_path()
1313
+ script_command = f"{py_exc} {script_file_name} {len(feature_columns)} "\
1314
+ f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
1315
+ f"{self._model_file_name_prefix} {self._is_lake_system}"
1316
+
1317
+ # Get unique values in partitioning columns.
1318
+ self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
1319
+
1320
+ self._install_initial_model_file()
1321
+
1322
+ opt = self._run_script(data, script_command, new_partition_columns, return_types)
1323
+
1324
+ self._fit_predict_execution_time = time.time() - st_time
1325
+
1326
+ if self._is_default_partition_value_fit:
1327
+ # For single model case, partition column is internally generated and no point in
1328
+ # returning it to the user.
1329
+
1330
+ # Extract columns from return types.
1331
+ returning_cols = [col[0] for col in return_types[len(new_partition_columns):]]
1332
+ return opt.select(returning_cols)
1333
+
1334
+ return opt
1335
+
1336
+ def fit_transform(self, X=None, y=None, **kwargs):
1337
+ """
1338
+ Please check the description in Docs/OpensourceML/sklearn.py.
1339
+ """
1340
+ # 'y' is not needed for transform().
1341
+ fit_obj = self.fit(X, y, **kwargs)
1342
+ kwargs["label_columns"] = None
1343
+ return fit_obj.transform(X, None, **kwargs)
1344
+
1345
+ @_validate_fit_run
1346
+ def _run_neighbors(self, X=None, **kwargs):
1347
+ """
1348
+ Internal function to run functions like kneighbors, radius_neighbors, kneighbors_graph,
1349
+ radius_neighbors_graph which returns multiple columns. This function will return data row
1350
+ along with the generated columns' row data, unlike sklearn's functions which returns just
1351
+ output data.
1352
+ """
1353
+ assert kwargs["name"], "function name should be passed."
1354
+ func_name = kwargs["name"]
1355
+ kwargs.pop("name")
1356
+
1357
+ if self.module_name != "sklearn.neighbors":
1358
+ raise AttributeError(f"{self.module_name+'.'+self.class_name} does not have {func_name}() method.")
1359
+
1360
+ data = kwargs.get("data", None)
1361
+ partition_columns = kwargs.get("partition_columns", None)
1362
+
1363
+ if not X and not partition_columns and not data:
1364
+ # If data is not passed, then run from client only.
1365
+ # TODO: decide whether to run from client or from Vantage.
1366
+ opt = super().__getattr__(func_name)(**kwargs)
1367
+ from scipy.sparse.csr import csr_matrix
1368
+ if isinstance(opt, csr_matrix):
1369
+ return opt.toarray()
1370
+ return opt
1371
+
1372
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
1373
+
1374
+ data, feature_columns, _, _, new_partition_columns = \
1375
+ self._validate_args_and_get_data(X=X, y=None, groups=None, kwargs=kwargs,
1376
+ skip_either_or_that=True)
1377
+
1378
+ # Remove the kwargs data.
1379
+ input_data = kwargs.pop("data", None)
1380
+ partition_cols = kwargs.pop("partition_columns", None)
1381
+ feature_cols = kwargs.pop("feature_columns", None)
1382
+ label_cols = kwargs.pop("label_columns", None)
1383
+
1384
+ if partition_columns:
1385
+ # kwargs are passed to kneighbors function. So, removing them from kwargs.
1386
+ kwargs.pop("partition_columns")
1387
+ self._is_default_partition_value_fit = False
1388
+
1389
+ # Generating new partition column name.
1390
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
1391
+ feature_columns,
1392
+ [],
1393
+ partition_columns)
1394
+
1395
+ args_str = self._get_kwargs_str(kwargs)
1396
+
1397
+ file_name = "sklearn_neighbors.py"
1398
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1399
+
1400
+ script_file_path = f"{file_name}" if self._is_lake_system \
1401
+ else f"./{self._db_name}/{file_name}"
1402
+
1403
+ # Returning feature columns also along with new columns.
1404
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1405
+ for col in (new_partition_columns + feature_columns)]
1406
+
1407
+ # `return_distance` is needed as the result is a tuple of two arrays when it is True.
1408
+ return_distance = kwargs.get("return_distance", True) # Default value is True.
1409
+
1410
+ # Though new columns return numpy arrays, we are returning them as strings.
1411
+ # TODO: Will update to columns later, if requested later.
1412
+ if func_name in ['kneighbors', 'radius_neighbors']:
1413
+ if return_distance:
1414
+ return_types += [("neigh_dist", VARCHAR())]
1415
+ return_types += [("neigh_ind", VARCHAR())]
1416
+ elif func_name in ['kneighbors_graph', 'radius_neighbors_graph']:
1417
+ return_types += [("A", VARCHAR())]
1418
+ else:
1419
+ return_types += [("output", VARCHAR())]
1420
+
1421
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
1422
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
1423
+
1424
+ py_exc = UtilFuncs._get_python_execution_path()
1425
+ script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1426
+ f"{partition_indices_str} {data_column_types_str} {self._model_file_name_prefix} {self._is_lake_system} "\
1427
+ f"{args_str}"
1428
+
1429
+ # Get unique values in partitioning columns.
1430
+ self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
1431
+
1432
+ self._install_initial_model_file()
1433
+
1434
+ opt = self._run_script(data, script_command, new_partition_columns, return_types)
1435
+
1436
+ return self._get_returning_df(opt, new_partition_columns, return_types)
1437
+
1438
+ def split(self, X=None, y=None, groups=None, **kwargs):
1439
+ """
1440
+ Please check the description in Docs/OpensourceML/sklearn.py.
1441
+ """
1442
+ opt = self._run_model_selection("split", X=X, y=y, groups=groups,
1443
+ skip_either_or_that=True, kwargs=kwargs)
1444
+
1445
+ # Get number of splits in the result DataFrame.
1446
+ n_splits = opt.drop_duplicate("split_id").shape[0]
1447
+
1448
+ data = kwargs.get("data", None)
1449
+ feature_columns = kwargs.get("feature_columns", [])
1450
+ label_columns = self._get_columns_as_list(kwargs.get("label_columns", []))
1451
+
1452
+ # If there is not X and y, get feature_columns and label_columns for "data".
1453
+ partition_columns = kwargs.get("partition_columns", [])
1454
+ feature_columns = [col for col in X.columns if col not in partition_columns] \
1455
+ if X and not data and not feature_columns else feature_columns
1456
+ label_columns = y.columns if y and not data and not label_columns else label_columns
1457
+
1458
+ # Return iterator of the train and test dataframes for each split.
1459
+ for i in range(1, n_splits+1):
1460
+ train_df = opt[(opt.split_id == i) & (opt.data_type == "train")]\
1461
+ .select(partition_columns + feature_columns + label_columns)
1462
+ train_df._index_label = None
1463
+ test_df = opt[(opt.split_id == i) & (opt.data_type == "test")]\
1464
+ .select(partition_columns + feature_columns + label_columns)
1465
+ test_df._index_label = None
1466
+
1467
+ yield train_df, test_df
1468
+
1469
+ def get_n_splits(self, X=None, y=None, groups=None, **kwargs):
1470
+ """
1471
+ Please check the description in Docs/OpensourceML/sklearn.py.
1472
+ """
1473
+ return self._run_model_selection("get_n_splits", X=X, y=y, groups=groups,
1474
+ skip_either_or_that=True, kwargs=kwargs)
1475
+
1476
+ def _run_model_selection(self,
1477
+ func_name,
1478
+ X=None,
1479
+ y=None,
1480
+ groups=None,
1481
+ skip_either_or_that=False,
1482
+ kwargs={}):
1483
+ """
1484
+ Internal function to run functions like split, get_n_splits of model selection module.
1485
+ - get_n_splits() returns number of splits as value, not as teradataml DataFrame.
1486
+ - split() returns teradataml DataFrame containing train and test data for each split
1487
+ (add partition information if the argument "partition_cols" is provided).
1488
+ """
1489
+ if self.module_name != "sklearn.model_selection":
1490
+ raise AttributeError(f"{self.module_name+'.'+self.class_name} does not "
1491
+ f"have {func_name}() method.")
1492
+
1493
+ data = kwargs.get("data", None)
1494
+
1495
+ if not X and not y and not groups and not data:
1496
+ # If data is not passed, then run from client only.
1497
+ # TODO: decide whether to run from client or from Vantage.
1498
+ return super().__getattr__(func_name)()
1499
+
1500
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
1501
+
1502
+ data, feature_columns, label_columns, group_columns, partition_columns = \
1503
+ self._validate_args_and_get_data(X=X, y=y, groups=groups, kwargs=kwargs,
1504
+ skip_either_or_that=skip_either_or_that)
1505
+
1506
+ if partition_columns:
1507
+ self._is_default_partition_value_fit = False
1508
+
1509
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
1510
+ feature_columns,
1511
+ label_columns,
1512
+ partition_columns,
1513
+ group_columns)
1514
+
1515
+ file_name = "sklearn_model_selection_split.py"
1516
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1517
+
1518
+ script_file_path = f"{file_name}" if self._is_lake_system \
1519
+ else f"./{self._db_name}/{file_name}"
1520
+
1521
+ if func_name == "split":
1522
+ # Need to generate data into splits of train and test.
1523
+ # split_id - the column which will be used to identify the split.
1524
+ # data_type - the column which will be used to identify whether the row is
1525
+ # train or test row.
1526
+ return_types = [("split_id", INTEGER()), ("data_type", VARCHAR())]
1527
+ # Returning feature columns and label columns as well.
1528
+ return_types += [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1529
+ for col in (feature_columns + label_columns)]
1530
+ else:
1531
+ # Return Varchar by default.
1532
+ # Returns Varchar even for functions like `get_n_splits` which returns large integer
1533
+ # numbers like `4998813702034726525205100` for `LeavePOut` class (when the argument
1534
+ # `p` is 28 and no of data rows is 100) as Vantage cannot scope it to INTEGER.
1535
+ return_types = [(func_name, VARCHAR())]
1536
+
1537
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1538
+ for col in new_partition_columns] + return_types
1539
+
1540
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
1541
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
1542
+
1543
+ py_exc = UtilFuncs._get_python_execution_path()
1544
+ script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1545
+ f"{len(label_columns)} {len(group_columns)} {partition_indices_str} {data_column_types_str} "\
1546
+ f"{self._model_file_name_prefix} {self._is_lake_system}"
1547
+
1548
+ # Get unique values in partitioning columns.
1549
+ self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
1550
+
1551
+ self._install_initial_model_file()
1552
+
1553
+ opt = self._run_script(data, script_command, new_partition_columns, return_types)
1554
+
1555
+ if func_name == "get_n_splits" and not partition_columns:
1556
+ # Return number of splits as value, not as dataframe.
1557
+ vals = execute_sql("select {} from {}".format(func_name, opt._table_name))
1558
+ opt = vals.fetchall()[0][0]
1559
+
1560
+ # Varchar is returned by the script. Convert it to int.
1561
+ return int(opt)
1562
+
1563
+ return opt
1564
+
1565
+ def _get_returning_df(self, script_df, partition_column, returns):
1566
+ """
1567
+ Internal function to return the teradataml Dataframe except
1568
+ partition_column.
1569
+ """
1570
+ if self._is_default_partition_value_fit:
1571
+ # For single model case, partition column is internally generated
1572
+ # and no point in returning it to the user.
1573
+
1574
+ # Extract columns from return types.
1575
+ returning_cols = [col[0] for col in returns[len(partition_column):]]
1576
+ return script_df.select(returning_cols)
1577
+ return script_df
1578
+
1579
+
1580
+ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
1581
+ def __init__(self, module_name, func_name):
1582
+ super().__init__()
1583
+ self.__module_name = module_name
1584
+ self.__func_name = func_name
1585
+ self.__params = None
1586
+ self.__data_args = OrderedDict()
1587
+ self._model_file_name = _generate_new_name(type="file_function", extension="py")
1588
+
1589
+ def __call__(self, **kwargs):
1590
+ """
1591
+ Run the function with all the arguments passed from `td_sklearn.<function_name>` function.
1592
+ """
1593
+ __data_columns = []
1594
+
1595
+ partition_cols = self._get_columns_as_list(kwargs.get("partition_columns", None))
1596
+ if partition_cols:
1597
+ kwargs.pop("partition_columns")
1598
+
1599
+ # Separate dataframe related arguments and their column names from actual kwargs.
1600
+ for k, v in kwargs.items():
1601
+ if isinstance(v, DataFrame):
1602
+ # All dataframes should be select of parent dataframe.
1603
+ _validate_df_query_type(v, "select", k)
1604
+
1605
+ # Save all columns in dataframe related arguments.
1606
+ __data_columns.extend(v.columns)
1607
+
1608
+ self.__data_args[k] = v
1609
+
1610
+
1611
+ # Get common parent dataframe from all dataframes.
1612
+ self.__tdml_df = DataFrameUtils()._get_common_parent_df_from_dataframes(list(self.__data_args.values()))
1613
+
1614
+ self._validate_existence_of_partition_columns(partition_cols, self.__tdml_df.columns)
1615
+
1616
+ self.__tdml_df = self.__tdml_df.select(__data_columns + partition_cols)
1617
+
1618
+ self.__tdml_df, partition_cols = self._get_data_and_data_partition_columns(self.__tdml_df,
1619
+ __data_columns,
1620
+ [],
1621
+ partition_cols
1622
+ )
1623
+
1624
+ # Prepare string of data arguments with name, indices where columns of that argument resides
1625
+ # and types of each of the column.
1626
+ data_args_str = self._prepare_data_args_string(kwargs)
1627
+
1628
+ self.__params = kwargs
1629
+
1630
+ # Get indices of partition_columns and types of all columns.
1631
+ data_column_types_str, partition_indices_str, _, partition_cols = \
1632
+ self._get_data_col_types_and_partition_col_indices_and_types(self.__tdml_df, partition_cols)
1633
+
1634
+ script_file_path = f"{self._model_file_name}" if self._is_lake_system \
1635
+ else f"./{self._db_name}/{self._model_file_name}"
1636
+ py_exc = UtilFuncs._get_python_execution_path()
1637
+ script_command = f"{py_exc} {script_file_path} {partition_indices_str} {data_column_types_str} {data_args_str}"
1638
+
1639
+ return_types = [(col, self.__tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
1640
+ for col in partition_cols] + [(self.__func_name, CLOB())]
1641
+
1642
+ # Generate new file in .teradataml directory and install it to Vantage.
1643
+ self._prepare_and_install_file()
1644
+
1645
+ self._model_data = self._run_script(self.__tdml_df, script_command, partition_cols, return_types)
1646
+ self._model_data._index_label = None
1647
+
1648
+ fit_partition_unique_values = self.__tdml_df.drop_duplicate(partition_cols).get_values()
1649
+
1650
+ self.extract_sklearn_obj(n_unique_partitions=len(fit_partition_unique_values),
1651
+ n_partition_cols=len(partition_cols))
1652
+
1653
+ # File cleanup after processing.
1654
+ os.remove(self._model_file_local)
1655
+ remove_file(file_identifier=self._model_file_name.split(".")[0], suppress_output=True,
1656
+ force_remove=True)
1657
+
1658
+ return self.modelObj
1659
+
1660
+ def _prepare_data_args_string(self, kwargs):
1661
+ """
1662
+ Get column indices and types of each data related arguments in the format:
1663
+ "{<arg_name>-<comma separated indices>-<comma separated types>}--
1664
+ {<arg_name>-<comma separated indices>-<comma separated types>}"
1665
+ """
1666
+ data_args_str = []
1667
+ for arg_name in list(self.__data_args.keys()):
1668
+ # Remove DataFrame arguments from kwargs, which will be passed to Script.
1669
+ kwargs.pop(arg_name)
1670
+
1671
+ # Get column indices and their types for each dataframe from parent dataframe.
1672
+ _, partition_indices_str, partition_types_str, _ = \
1673
+ self._get_data_col_types_and_partition_col_indices_and_types(self.__tdml_df,
1674
+ self.__data_args[arg_name].columns,
1675
+ idx_delim=",",
1676
+ types_delim=",")
1677
+
1678
+ # Format "<arg_name>-<comma separated indices>-<comma separated types>"
1679
+ data_args_str.append(f"{arg_name}-{partition_indices_str}-{partition_types_str}")
1680
+
1681
+ # Format "{<arg_name>-<comma separated indices>-<comma separated types>}--
1682
+ # {<arg_name>-<comma separated indices>-<comma separated types>}"
1683
+ return "--".join(data_args_str)
1684
+
1685
+ def _validate_existence_of_partition_columns(self, partition_columns, all_columns):
1686
+ """
1687
+ Validate if columns in "partition_columns" argument are present in any of the given
1688
+ dataframes.
1689
+ """
1690
+ invalid_part_cols = [c for c in partition_columns if c not in all_columns]
1691
+
1692
+ if invalid_part_cols:
1693
+ raise ValueError(Messages.get_message(MessageCodes.INVALID_PARTITIONING_COLS,
1694
+ ", ".join(invalid_part_cols),
1695
+ "', '".join(list(self.__data_args.keys())))
1696
+ )
1697
+
1698
+ def _prepare_and_install_file(self):
1699
+ """
1700
+ Prepare function script file from template file and install it in Vaantage.
1701
+ """
1702
+ with open(os.path.join(self._scripts_path, "sklearn_function.template")) as fp:
1703
+ script_data = fp.read()
1704
+ script_data = script_data.replace("<module_name>",self.__module_name).\
1705
+ replace("<func_name>",self.__func_name).replace("<params>", json.dumps(self.__params))
1706
+
1707
+ self._model_file_local = os.path.join(self._tdml_tmp_dir, self._model_file_name)
1708
+
1709
+ with open(self._model_file_local, "w") as fp:
1710
+ fp.write(script_data)
1711
+
1712
+ self._install_script_file(file_identifier=self._model_file_name.split(".")[0],
1713
+ file_name=self._model_file_name,
1714
+ file_location=self._tdml_tmp_dir)
1715
+