teradataml 17.20.0.7__py3-none-any.whl → 20.0.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (1285) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +1864 -1640
  4. teradataml/__init__.py +70 -60
  5. teradataml/_version.py +11 -11
  6. teradataml/analytics/Transformations.py +2995 -2995
  7. teradataml/analytics/__init__.py +81 -83
  8. teradataml/analytics/analytic_function_executor.py +2013 -2010
  9. teradataml/analytics/analytic_query_generator.py +958 -958
  10. teradataml/analytics/byom/H2OPredict.py +514 -514
  11. teradataml/analytics/byom/PMMLPredict.py +437 -437
  12. teradataml/analytics/byom/__init__.py +14 -14
  13. teradataml/analytics/json_parser/__init__.py +130 -130
  14. teradataml/analytics/json_parser/analytic_functions_argument.py +1707 -1707
  15. teradataml/analytics/json_parser/json_store.py +191 -191
  16. teradataml/analytics/json_parser/metadata.py +1637 -1637
  17. teradataml/analytics/json_parser/utils.py +804 -803
  18. teradataml/analytics/meta_class.py +196 -196
  19. teradataml/analytics/sqle/DecisionTreePredict.py +455 -470
  20. teradataml/analytics/sqle/NaiveBayesPredict.py +419 -428
  21. teradataml/analytics/sqle/__init__.py +97 -110
  22. teradataml/analytics/sqle/json/decisiontreepredict_sqle.json +78 -78
  23. teradataml/analytics/sqle/json/naivebayespredict_sqle.json +62 -62
  24. teradataml/analytics/table_operator/__init__.py +10 -10
  25. teradataml/analytics/uaf/__init__.py +63 -63
  26. teradataml/analytics/utils.py +693 -692
  27. teradataml/analytics/valib.py +1603 -1600
  28. teradataml/automl/__init__.py +1628 -0
  29. teradataml/automl/custom_json_utils.py +1270 -0
  30. teradataml/automl/data_preparation.py +993 -0
  31. teradataml/automl/data_transformation.py +727 -0
  32. teradataml/automl/feature_engineering.py +1648 -0
  33. teradataml/automl/feature_exploration.py +547 -0
  34. teradataml/automl/model_evaluation.py +163 -0
  35. teradataml/automl/model_training.py +887 -0
  36. teradataml/catalog/__init__.py +1 -3
  37. teradataml/catalog/byom.py +1759 -1716
  38. teradataml/catalog/function_argument_mapper.py +859 -861
  39. teradataml/catalog/model_cataloging_utils.py +491 -1510
  40. teradataml/clients/pkce_client.py +481 -481
  41. teradataml/common/aed_utils.py +6 -2
  42. teradataml/common/bulk_exposed_utils.py +111 -111
  43. teradataml/common/constants.py +1433 -1441
  44. teradataml/common/deprecations.py +160 -0
  45. teradataml/common/exceptions.py +73 -73
  46. teradataml/common/formula.py +742 -742
  47. teradataml/common/garbagecollector.py +592 -635
  48. teradataml/common/messagecodes.py +422 -431
  49. teradataml/common/messages.py +227 -231
  50. teradataml/common/sqlbundle.py +693 -693
  51. teradataml/common/td_coltype_code_to_tdtype.py +48 -48
  52. teradataml/common/utils.py +2418 -2500
  53. teradataml/common/warnings.py +25 -25
  54. teradataml/common/wrapper_utils.py +1 -110
  55. teradataml/config/dummy_file1.cfg +4 -4
  56. teradataml/config/dummy_file2.cfg +2 -2
  57. teradataml/config/sqlengine_alias_definitions_v1.0 +13 -13
  58. teradataml/config/sqlengine_alias_definitions_v1.1 +19 -19
  59. teradataml/config/sqlengine_alias_definitions_v1.3 +18 -18
  60. teradataml/context/aed_context.py +217 -217
  61. teradataml/context/context.py +1071 -999
  62. teradataml/data/A_loan.csv +19 -19
  63. teradataml/data/BINARY_REALS_LEFT.csv +11 -11
  64. teradataml/data/BINARY_REALS_RIGHT.csv +11 -11
  65. teradataml/data/B_loan.csv +49 -49
  66. teradataml/data/BuoyData2.csv +17 -17
  67. teradataml/data/CONVOLVE2_COMPLEX_LEFT.csv +5 -5
  68. teradataml/data/CONVOLVE2_COMPLEX_RIGHT.csv +5 -5
  69. teradataml/data/Convolve2RealsLeft.csv +5 -5
  70. teradataml/data/Convolve2RealsRight.csv +5 -5
  71. teradataml/data/Convolve2ValidLeft.csv +11 -11
  72. teradataml/data/Convolve2ValidRight.csv +11 -11
  73. teradataml/data/DFFTConv_Real_8_8.csv +65 -65
  74. teradataml/data/Orders1_12mf.csv +24 -24
  75. teradataml/data/Pi_loan.csv +7 -7
  76. teradataml/data/SMOOTHED_DATA.csv +7 -7
  77. teradataml/data/TestDFFT8.csv +9 -9
  78. teradataml/data/TestRiver.csv +109 -109
  79. teradataml/data/Traindata.csv +28 -28
  80. teradataml/data/acf.csv +17 -17
  81. teradataml/data/adaboost_example.json +34 -34
  82. teradataml/data/adaboostpredict_example.json +24 -24
  83. teradataml/data/additional_table.csv +10 -10
  84. teradataml/data/admissions_test.csv +21 -21
  85. teradataml/data/admissions_train.csv +41 -41
  86. teradataml/data/admissions_train_nulls.csv +41 -41
  87. teradataml/data/ageandheight.csv +13 -13
  88. teradataml/data/ageandpressure.csv +31 -31
  89. teradataml/data/antiselect_example.json +36 -36
  90. teradataml/data/antiselect_input.csv +8 -8
  91. teradataml/data/antiselect_input_mixed_case.csv +8 -8
  92. teradataml/data/applicant_external.csv +6 -6
  93. teradataml/data/applicant_reference.csv +6 -6
  94. teradataml/data/arima_example.json +9 -9
  95. teradataml/data/assortedtext_input.csv +8 -8
  96. teradataml/data/attribution_example.json +33 -33
  97. teradataml/data/attribution_sample_table.csv +27 -27
  98. teradataml/data/attribution_sample_table1.csv +6 -6
  99. teradataml/data/attribution_sample_table2.csv +11 -11
  100. teradataml/data/bank_churn.csv +10001 -0
  101. teradataml/data/bank_web_clicks1.csv +42 -42
  102. teradataml/data/bank_web_clicks2.csv +91 -91
  103. teradataml/data/bank_web_url.csv +85 -85
  104. teradataml/data/barrier.csv +2 -2
  105. teradataml/data/barrier_new.csv +3 -3
  106. teradataml/data/betweenness_example.json +13 -13
  107. teradataml/data/bin_breaks.csv +8 -8
  108. teradataml/data/bin_fit_ip.csv +3 -3
  109. teradataml/data/binary_complex_left.csv +11 -11
  110. teradataml/data/binary_complex_right.csv +11 -11
  111. teradataml/data/binary_matrix_complex_left.csv +21 -21
  112. teradataml/data/binary_matrix_complex_right.csv +21 -21
  113. teradataml/data/binary_matrix_real_left.csv +21 -21
  114. teradataml/data/binary_matrix_real_right.csv +21 -21
  115. teradataml/data/blood2ageandweight.csv +26 -26
  116. teradataml/data/bmi.csv +501 -0
  117. teradataml/data/boston.csv +507 -507
  118. teradataml/data/buoydata_mix.csv +11 -11
  119. teradataml/data/burst_data.csv +5 -5
  120. teradataml/data/burst_example.json +20 -20
  121. teradataml/data/byom_example.json +17 -17
  122. teradataml/data/bytes_table.csv +3 -3
  123. teradataml/data/cal_housing_ex_raw.csv +70 -70
  124. teradataml/data/callers.csv +7 -7
  125. teradataml/data/calls.csv +10 -10
  126. teradataml/data/cars_hist.csv +33 -33
  127. teradataml/data/cat_table.csv +24 -24
  128. teradataml/data/ccm_example.json +31 -31
  129. teradataml/data/ccm_input.csv +91 -91
  130. teradataml/data/ccm_input2.csv +13 -13
  131. teradataml/data/ccmexample.csv +101 -101
  132. teradataml/data/ccmprepare_example.json +8 -8
  133. teradataml/data/ccmprepare_input.csv +91 -91
  134. teradataml/data/cfilter_example.json +12 -12
  135. teradataml/data/changepointdetection_example.json +18 -18
  136. teradataml/data/changepointdetectionrt_example.json +8 -8
  137. teradataml/data/chi_sq.csv +2 -2
  138. teradataml/data/churn_data.csv +14 -14
  139. teradataml/data/churn_emission.csv +35 -35
  140. teradataml/data/churn_initial.csv +3 -3
  141. teradataml/data/churn_state_transition.csv +5 -5
  142. teradataml/data/citedges_2.csv +745 -745
  143. teradataml/data/citvertices_2.csv +1210 -1210
  144. teradataml/data/clicks2.csv +16 -16
  145. teradataml/data/clickstream.csv +12 -12
  146. teradataml/data/clickstream1.csv +11 -11
  147. teradataml/data/closeness_example.json +15 -15
  148. teradataml/data/complaints.csv +21 -21
  149. teradataml/data/complaints_mini.csv +3 -3
  150. teradataml/data/complaints_testtoken.csv +224 -224
  151. teradataml/data/complaints_tokens_test.csv +353 -353
  152. teradataml/data/complaints_traintoken.csv +472 -472
  153. teradataml/data/computers_category.csv +1001 -1001
  154. teradataml/data/computers_test1.csv +1252 -1252
  155. teradataml/data/computers_train1.csv +5009 -5009
  156. teradataml/data/computers_train1_clustered.csv +5009 -5009
  157. teradataml/data/confusionmatrix_example.json +9 -9
  158. teradataml/data/conversion_event_table.csv +3 -3
  159. teradataml/data/corr_input.csv +17 -17
  160. teradataml/data/correlation_example.json +11 -11
  161. teradataml/data/coxhazardratio_example.json +39 -39
  162. teradataml/data/coxph_example.json +15 -15
  163. teradataml/data/coxsurvival_example.json +28 -28
  164. teradataml/data/cpt.csv +41 -41
  165. teradataml/data/credit_ex_merged.csv +45 -45
  166. teradataml/data/customer_loyalty.csv +301 -301
  167. teradataml/data/customer_loyalty_newseq.csv +31 -31
  168. teradataml/data/dataframe_example.json +146 -146
  169. teradataml/data/decisionforest_example.json +37 -37
  170. teradataml/data/decisionforestpredict_example.json +38 -38
  171. teradataml/data/decisiontree_example.json +21 -21
  172. teradataml/data/decisiontreepredict_example.json +45 -45
  173. teradataml/data/dfft2_size4_real.csv +17 -17
  174. teradataml/data/dfft2_test_matrix16.csv +17 -17
  175. teradataml/data/dfft2conv_real_4_4.csv +65 -65
  176. teradataml/data/diabetes.csv +443 -443
  177. teradataml/data/diabetes_test.csv +89 -89
  178. teradataml/data/dict_table.csv +5 -5
  179. teradataml/data/docperterm_table.csv +4 -4
  180. teradataml/data/docs/__init__.py +1 -1
  181. teradataml/data/docs/byom/docs/DataRobotPredict.py +180 -180
  182. teradataml/data/docs/byom/docs/DataikuPredict.py +177 -177
  183. teradataml/data/docs/byom/docs/H2OPredict.py +324 -324
  184. teradataml/data/docs/byom/docs/ONNXPredict.py +283 -283
  185. teradataml/data/docs/byom/docs/PMMLPredict.py +277 -277
  186. teradataml/data/docs/sqle/docs_17_10/Antiselect.py +82 -82
  187. teradataml/data/docs/sqle/docs_17_10/Attribution.py +199 -199
  188. teradataml/data/docs/sqle/docs_17_10/BincodeFit.py +171 -171
  189. teradataml/data/docs/sqle/docs_17_10/BincodeTransform.py +131 -130
  190. teradataml/data/docs/sqle/docs_17_10/CategoricalSummary.py +86 -86
  191. teradataml/data/docs/sqle/docs_17_10/ChiSq.py +90 -90
  192. teradataml/data/docs/sqle/docs_17_10/ColumnSummary.py +85 -85
  193. teradataml/data/docs/sqle/docs_17_10/ConvertTo.py +95 -95
  194. teradataml/data/docs/sqle/docs_17_10/DecisionForestPredict.py +139 -139
  195. teradataml/data/docs/sqle/docs_17_10/DecisionTreePredict.py +151 -151
  196. teradataml/data/docs/sqle/docs_17_10/FTest.py +160 -160
  197. teradataml/data/docs/sqle/docs_17_10/FillRowId.py +82 -82
  198. teradataml/data/docs/sqle/docs_17_10/Fit.py +87 -87
  199. teradataml/data/docs/sqle/docs_17_10/GLMPredict.py +144 -144
  200. teradataml/data/docs/sqle/docs_17_10/GetRowsWithMissingValues.py +84 -84
  201. teradataml/data/docs/sqle/docs_17_10/GetRowsWithoutMissingValues.py +81 -81
  202. teradataml/data/docs/sqle/docs_17_10/Histogram.py +164 -164
  203. teradataml/data/docs/sqle/docs_17_10/MovingAverage.py +134 -134
  204. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +208 -208
  205. teradataml/data/docs/sqle/docs_17_10/NPath.py +265 -265
  206. teradataml/data/docs/sqle/docs_17_10/NaiveBayesPredict.py +116 -116
  207. teradataml/data/docs/sqle/docs_17_10/NaiveBayesTextClassifierPredict.py +176 -176
  208. teradataml/data/docs/sqle/docs_17_10/NumApply.py +147 -147
  209. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +132 -132
  210. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +103 -103
  211. teradataml/data/docs/sqle/docs_17_10/OutlierFilterFit.py +165 -165
  212. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +101 -101
  213. teradataml/data/docs/sqle/docs_17_10/Pack.py +128 -128
  214. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesFit.py +111 -111
  215. teradataml/data/docs/sqle/docs_17_10/PolynomialFeaturesTransform.py +102 -102
  216. teradataml/data/docs/sqle/docs_17_10/QQNorm.py +104 -104
  217. teradataml/data/docs/sqle/docs_17_10/RoundColumns.py +109 -109
  218. teradataml/data/docs/sqle/docs_17_10/RowNormalizeFit.py +117 -117
  219. teradataml/data/docs/sqle/docs_17_10/RowNormalizeTransform.py +99 -98
  220. teradataml/data/docs/sqle/docs_17_10/SVMSparsePredict.py +152 -152
  221. teradataml/data/docs/sqle/docs_17_10/ScaleFit.py +197 -197
  222. teradataml/data/docs/sqle/docs_17_10/ScaleTransform.py +99 -98
  223. teradataml/data/docs/sqle/docs_17_10/Sessionize.py +113 -113
  224. teradataml/data/docs/sqle/docs_17_10/SimpleImputeFit.py +116 -116
  225. teradataml/data/docs/sqle/docs_17_10/SimpleImputeTransform.py +98 -98
  226. teradataml/data/docs/sqle/docs_17_10/StrApply.py +187 -187
  227. teradataml/data/docs/sqle/docs_17_10/StringSimilarity.py +145 -145
  228. teradataml/data/docs/sqle/docs_17_10/Transform.py +105 -104
  229. teradataml/data/docs/sqle/docs_17_10/UnivariateStatistics.py +141 -141
  230. teradataml/data/docs/sqle/docs_17_10/Unpack.py +214 -214
  231. teradataml/data/docs/sqle/docs_17_10/WhichMax.py +83 -83
  232. teradataml/data/docs/sqle/docs_17_10/WhichMin.py +83 -83
  233. teradataml/data/docs/sqle/docs_17_10/ZTest.py +155 -155
  234. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +126 -126
  235. teradataml/data/docs/sqle/docs_17_20/Antiselect.py +82 -82
  236. teradataml/data/docs/sqle/docs_17_20/Attribution.py +200 -200
  237. teradataml/data/docs/sqle/docs_17_20/BincodeFit.py +171 -171
  238. teradataml/data/docs/sqle/docs_17_20/BincodeTransform.py +139 -138
  239. teradataml/data/docs/sqle/docs_17_20/CategoricalSummary.py +86 -86
  240. teradataml/data/docs/sqle/docs_17_20/ChiSq.py +90 -90
  241. teradataml/data/docs/sqle/docs_17_20/ClassificationEvaluator.py +166 -166
  242. teradataml/data/docs/sqle/docs_17_20/ColumnSummary.py +85 -85
  243. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +243 -243
  244. teradataml/data/docs/sqle/docs_17_20/ConvertTo.py +113 -113
  245. teradataml/data/docs/sqle/docs_17_20/DecisionForest.py +279 -279
  246. teradataml/data/docs/sqle/docs_17_20/DecisionForestPredict.py +144 -144
  247. teradataml/data/docs/sqle/docs_17_20/DecisionTreePredict.py +135 -135
  248. teradataml/data/docs/sqle/docs_17_20/FTest.py +160 -160
  249. teradataml/data/docs/sqle/docs_17_20/FillRowId.py +82 -82
  250. teradataml/data/docs/sqle/docs_17_20/Fit.py +87 -87
  251. teradataml/data/docs/sqle/docs_17_20/GLM.py +380 -380
  252. teradataml/data/docs/sqle/docs_17_20/GLMPerSegment.py +414 -414
  253. teradataml/data/docs/sqle/docs_17_20/GLMPredict.py +144 -144
  254. teradataml/data/docs/sqle/docs_17_20/GLMPredictPerSegment.py +233 -234
  255. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +123 -123
  256. teradataml/data/docs/sqle/docs_17_20/GetRowsWithMissingValues.py +108 -108
  257. teradataml/data/docs/sqle/docs_17_20/GetRowsWithoutMissingValues.py +105 -105
  258. teradataml/data/docs/sqle/docs_17_20/Histogram.py +223 -223
  259. teradataml/data/docs/sqle/docs_17_20/KMeans.py +204 -204
  260. teradataml/data/docs/sqle/docs_17_20/KMeansPredict.py +144 -143
  261. teradataml/data/docs/sqle/docs_17_20/KNN.py +214 -214
  262. teradataml/data/docs/sqle/docs_17_20/MovingAverage.py +134 -134
  263. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +208 -208
  264. teradataml/data/docs/sqle/docs_17_20/NPath.py +265 -265
  265. teradataml/data/docs/sqle/docs_17_20/NaiveBayesPredict.py +116 -116
  266. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierPredict.py +177 -176
  267. teradataml/data/docs/sqle/docs_17_20/NaiveBayesTextClassifierTrainer.py +126 -126
  268. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +117 -117
  269. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineTransform.py +112 -112
  270. teradataml/data/docs/sqle/docs_17_20/NumApply.py +147 -147
  271. teradataml/data/docs/sqle/docs_17_20/OneClassSVM.py +307 -307
  272. teradataml/data/docs/sqle/docs_17_20/OneClassSVMPredict.py +185 -184
  273. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +225 -225
  274. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +115 -115
  275. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingFit.py +219 -219
  276. teradataml/data/docs/sqle/docs_17_20/OrdinalEncodingTransform.py +127 -127
  277. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +189 -189
  278. teradataml/data/docs/sqle/docs_17_20/OutlierFilterTransform.py +117 -112
  279. teradataml/data/docs/sqle/docs_17_20/Pack.py +128 -128
  280. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesFit.py +111 -111
  281. teradataml/data/docs/sqle/docs_17_20/PolynomialFeaturesTransform.py +112 -111
  282. teradataml/data/docs/sqle/docs_17_20/QQNorm.py +104 -104
  283. teradataml/data/docs/sqle/docs_17_20/ROC.py +163 -163
  284. teradataml/data/docs/sqle/docs_17_20/RandomProjectionFit.py +154 -154
  285. teradataml/data/docs/sqle/docs_17_20/RandomProjectionMinComponents.py +106 -106
  286. teradataml/data/docs/sqle/docs_17_20/RandomProjectionTransform.py +120 -120
  287. teradataml/data/docs/sqle/docs_17_20/RegressionEvaluator.py +211 -211
  288. teradataml/data/docs/sqle/docs_17_20/RoundColumns.py +108 -108
  289. teradataml/data/docs/sqle/docs_17_20/RowNormalizeFit.py +117 -117
  290. teradataml/data/docs/sqle/docs_17_20/RowNormalizeTransform.py +111 -110
  291. teradataml/data/docs/sqle/docs_17_20/SVM.py +413 -413
  292. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +202 -202
  293. teradataml/data/docs/sqle/docs_17_20/SVMSparsePredict.py +152 -152
  294. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +197 -197
  295. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +110 -109
  296. teradataml/data/docs/sqle/docs_17_20/SentimentExtractor.py +206 -206
  297. teradataml/data/docs/sqle/docs_17_20/Sessionize.py +113 -113
  298. teradataml/data/docs/sqle/docs_17_20/Silhouette.py +152 -152
  299. teradataml/data/docs/sqle/docs_17_20/SimpleImputeFit.py +116 -116
  300. teradataml/data/docs/sqle/docs_17_20/SimpleImputeTransform.py +109 -108
  301. teradataml/data/docs/sqle/docs_17_20/StrApply.py +187 -187
  302. teradataml/data/docs/sqle/docs_17_20/StringSimilarity.py +145 -145
  303. teradataml/data/docs/sqle/docs_17_20/TDDecisionForestPredict.py +207 -207
  304. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +171 -171
  305. teradataml/data/docs/sqle/docs_17_20/TargetEncodingFit.py +266 -266
  306. teradataml/data/docs/sqle/docs_17_20/TargetEncodingTransform.py +141 -140
  307. teradataml/data/docs/sqle/docs_17_20/TextParser.py +172 -172
  308. teradataml/data/docs/sqle/docs_17_20/TrainTestSplit.py +159 -159
  309. teradataml/data/docs/sqle/docs_17_20/Transform.py +123 -123
  310. teradataml/data/docs/sqle/docs_17_20/UnivariateStatistics.py +141 -141
  311. teradataml/data/docs/sqle/docs_17_20/Unpack.py +214 -214
  312. teradataml/data/docs/sqle/docs_17_20/VectorDistance.py +168 -168
  313. teradataml/data/docs/sqle/docs_17_20/WhichMax.py +83 -83
  314. teradataml/data/docs/sqle/docs_17_20/WhichMin.py +83 -83
  315. teradataml/data/docs/sqle/docs_17_20/WordEmbeddings.py +236 -236
  316. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +353 -353
  317. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +275 -275
  318. teradataml/data/docs/sqle/docs_17_20/ZTest.py +155 -155
  319. teradataml/data/docs/tableoperator/docs_17_00/ReadNOS.py +429 -429
  320. teradataml/data/docs/tableoperator/docs_17_05/ReadNOS.py +429 -429
  321. teradataml/data/docs/tableoperator/docs_17_05/WriteNOS.py +347 -347
  322. teradataml/data/docs/tableoperator/docs_17_10/ReadNOS.py +428 -428
  323. teradataml/data/docs/tableoperator/docs_17_10/WriteNOS.py +347 -347
  324. teradataml/data/docs/tableoperator/docs_17_20/ReadNOS.py +439 -439
  325. teradataml/data/docs/tableoperator/docs_17_20/WriteNOS.py +386 -386
  326. teradataml/data/docs/uaf/docs_17_20/ACF.py +195 -195
  327. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +369 -369
  328. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +142 -142
  329. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +159 -159
  330. teradataml/data/docs/uaf/docs_17_20/BinaryMatrixOp.py +247 -247
  331. teradataml/data/docs/uaf/docs_17_20/BinarySeriesOp.py +252 -252
  332. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +177 -177
  333. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +174 -174
  334. teradataml/data/docs/uaf/docs_17_20/Convolve.py +226 -226
  335. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +214 -214
  336. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +183 -183
  337. teradataml/data/docs/uaf/docs_17_20/DFFT.py +203 -203
  338. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +216 -216
  339. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +215 -215
  340. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +191 -191
  341. teradataml/data/docs/uaf/docs_17_20/DTW.py +179 -179
  342. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +144 -144
  343. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +183 -183
  344. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +184 -184
  345. teradataml/data/docs/uaf/docs_17_20/FitMetrics.py +172 -172
  346. teradataml/data/docs/uaf/docs_17_20/GenseriesFormula.py +205 -205
  347. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +142 -142
  348. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +258 -258
  349. teradataml/data/docs/uaf/docs_17_20/IDFFT.py +164 -164
  350. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +198 -198
  351. teradataml/data/docs/uaf/docs_17_20/InputValidator.py +120 -120
  352. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +155 -155
  353. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +214 -214
  354. teradataml/data/docs/uaf/docs_17_20/MAMean.py +173 -173
  355. teradataml/data/docs/uaf/docs_17_20/MInfo.py +133 -133
  356. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +135 -135
  357. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +190 -190
  358. teradataml/data/docs/uaf/docs_17_20/PACF.py +158 -158
  359. teradataml/data/docs/uaf/docs_17_20/Portman.py +216 -216
  360. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +154 -154
  361. teradataml/data/docs/uaf/docs_17_20/Resample.py +228 -228
  362. teradataml/data/docs/uaf/docs_17_20/SInfo.py +122 -122
  363. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +165 -165
  364. teradataml/data/docs/uaf/docs_17_20/SelectionCriteria.py +173 -173
  365. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +170 -170
  366. teradataml/data/docs/uaf/docs_17_20/SignifResidmean.py +163 -163
  367. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +179 -179
  368. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +207 -207
  369. teradataml/data/docs/uaf/docs_17_20/TrackingOp.py +150 -150
  370. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +171 -171
  371. teradataml/data/docs/uaf/docs_17_20/Unnormalize.py +201 -201
  372. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +169 -169
  373. teradataml/data/dtw_example.json +17 -17
  374. teradataml/data/dtw_t1.csv +11 -11
  375. teradataml/data/dtw_t2.csv +4 -4
  376. teradataml/data/dwt2d_example.json +15 -15
  377. teradataml/data/dwt_example.json +14 -14
  378. teradataml/data/dwt_filter_dim.csv +5 -5
  379. teradataml/data/emission.csv +9 -9
  380. teradataml/data/emp_table_by_dept.csv +19 -19
  381. teradataml/data/employee_info.csv +4 -4
  382. teradataml/data/employee_table.csv +6 -6
  383. teradataml/data/excluding_event_table.csv +2 -2
  384. teradataml/data/finance_data.csv +6 -6
  385. teradataml/data/finance_data2.csv +61 -61
  386. teradataml/data/finance_data3.csv +93 -93
  387. teradataml/data/fish.csv +160 -0
  388. teradataml/data/fm_blood2ageandweight.csv +26 -26
  389. teradataml/data/fmeasure_example.json +11 -11
  390. teradataml/data/followers_leaders.csv +10 -10
  391. teradataml/data/fpgrowth_example.json +12 -12
  392. teradataml/data/frequentpaths_example.json +29 -29
  393. teradataml/data/friends.csv +9 -9
  394. teradataml/data/fs_input.csv +33 -33
  395. teradataml/data/fs_input1.csv +33 -33
  396. teradataml/data/genData.csv +513 -513
  397. teradataml/data/geodataframe_example.json +39 -39
  398. teradataml/data/glass_types.csv +215 -0
  399. teradataml/data/glm_admissions_model.csv +12 -12
  400. teradataml/data/glm_example.json +29 -29
  401. teradataml/data/glml1l2_example.json +28 -28
  402. teradataml/data/glml1l2predict_example.json +54 -54
  403. teradataml/data/glmpredict_example.json +54 -54
  404. teradataml/data/gq_t1.csv +21 -21
  405. teradataml/data/hconvolve_complex_right.csv +5 -5
  406. teradataml/data/hconvolve_complex_rightmulti.csv +5 -5
  407. teradataml/data/histogram_example.json +11 -11
  408. teradataml/data/hmmdecoder_example.json +78 -78
  409. teradataml/data/hmmevaluator_example.json +24 -24
  410. teradataml/data/hmmsupervised_example.json +10 -10
  411. teradataml/data/hmmunsupervised_example.json +7 -7
  412. teradataml/data/house_values.csv +12 -12
  413. teradataml/data/house_values2.csv +13 -13
  414. teradataml/data/housing_cat.csv +7 -7
  415. teradataml/data/housing_data.csv +9 -9
  416. teradataml/data/housing_test.csv +47 -47
  417. teradataml/data/housing_test_binary.csv +47 -47
  418. teradataml/data/housing_train.csv +493 -493
  419. teradataml/data/housing_train_attribute.csv +4 -4
  420. teradataml/data/housing_train_binary.csv +437 -437
  421. teradataml/data/housing_train_parameter.csv +2 -2
  422. teradataml/data/housing_train_response.csv +493 -493
  423. teradataml/data/ibm_stock.csv +370 -370
  424. teradataml/data/ibm_stock1.csv +370 -370
  425. teradataml/data/identitymatch_example.json +21 -21
  426. teradataml/data/idf_table.csv +4 -4
  427. teradataml/data/impressions.csv +101 -101
  428. teradataml/data/inflation.csv +21 -21
  429. teradataml/data/initial.csv +3 -3
  430. teradataml/data/insect_sprays.csv +12 -12
  431. teradataml/data/insurance.csv +1339 -1339
  432. teradataml/data/interpolator_example.json +12 -12
  433. teradataml/data/iris_altinput.csv +481 -481
  434. teradataml/data/iris_attribute_output.csv +8 -8
  435. teradataml/data/iris_attribute_test.csv +121 -121
  436. teradataml/data/iris_attribute_train.csv +481 -481
  437. teradataml/data/iris_category_expect_predict.csv +31 -31
  438. teradataml/data/iris_data.csv +151 -0
  439. teradataml/data/iris_input.csv +151 -151
  440. teradataml/data/iris_response_train.csv +121 -121
  441. teradataml/data/iris_test.csv +31 -31
  442. teradataml/data/iris_train.csv +121 -121
  443. teradataml/data/join_table1.csv +4 -4
  444. teradataml/data/join_table2.csv +4 -4
  445. teradataml/data/jsons/anly_function_name.json +6 -6
  446. teradataml/data/jsons/byom/dataikupredict.json +147 -147
  447. teradataml/data/jsons/byom/datarobotpredict.json +146 -146
  448. teradataml/data/jsons/byom/h2opredict.json +194 -194
  449. teradataml/data/jsons/byom/onnxpredict.json +186 -186
  450. teradataml/data/jsons/byom/pmmlpredict.json +146 -146
  451. teradataml/data/jsons/paired_functions.json +435 -435
  452. teradataml/data/jsons/sqle/16.20/Antiselect.json +56 -56
  453. teradataml/data/jsons/sqle/16.20/Attribution.json +249 -249
  454. teradataml/data/jsons/sqle/16.20/DecisionForestPredict.json +156 -156
  455. teradataml/data/jsons/sqle/16.20/DecisionTreePredict.json +170 -170
  456. teradataml/data/jsons/sqle/16.20/GLMPredict.json +122 -122
  457. teradataml/data/jsons/sqle/16.20/MovingAverage.json +367 -367
  458. teradataml/data/jsons/sqle/16.20/NGramSplitter.json +239 -239
  459. teradataml/data/jsons/sqle/16.20/NaiveBayesPredict.json +136 -136
  460. teradataml/data/jsons/sqle/16.20/NaiveBayesTextClassifierPredict.json +235 -235
  461. teradataml/data/jsons/sqle/16.20/Pack.json +98 -98
  462. teradataml/data/jsons/sqle/16.20/SVMSparsePredict.json +162 -162
  463. teradataml/data/jsons/sqle/16.20/Sessionize.json +105 -105
  464. teradataml/data/jsons/sqle/16.20/StringSimilarity.json +86 -86
  465. teradataml/data/jsons/sqle/16.20/Unpack.json +166 -166
  466. teradataml/data/jsons/sqle/16.20/nPath.json +269 -269
  467. teradataml/data/jsons/sqle/17.00/Antiselect.json +56 -56
  468. teradataml/data/jsons/sqle/17.00/Attribution.json +249 -249
  469. teradataml/data/jsons/sqle/17.00/DecisionForestPredict.json +156 -156
  470. teradataml/data/jsons/sqle/17.00/DecisionTreePredict.json +170 -170
  471. teradataml/data/jsons/sqle/17.00/GLMPredict.json +122 -122
  472. teradataml/data/jsons/sqle/17.00/MovingAverage.json +367 -367
  473. teradataml/data/jsons/sqle/17.00/NGramSplitter.json +239 -239
  474. teradataml/data/jsons/sqle/17.00/NaiveBayesPredict.json +136 -136
  475. teradataml/data/jsons/sqle/17.00/NaiveBayesTextClassifierPredict.json +235 -235
  476. teradataml/data/jsons/sqle/17.00/Pack.json +98 -98
  477. teradataml/data/jsons/sqle/17.00/SVMSparsePredict.json +162 -162
  478. teradataml/data/jsons/sqle/17.00/Sessionize.json +105 -105
  479. teradataml/data/jsons/sqle/17.00/StringSimilarity.json +86 -86
  480. teradataml/data/jsons/sqle/17.00/Unpack.json +166 -166
  481. teradataml/data/jsons/sqle/17.00/nPath.json +269 -269
  482. teradataml/data/jsons/sqle/17.05/Antiselect.json +56 -56
  483. teradataml/data/jsons/sqle/17.05/Attribution.json +249 -249
  484. teradataml/data/jsons/sqle/17.05/DecisionForestPredict.json +156 -156
  485. teradataml/data/jsons/sqle/17.05/DecisionTreePredict.json +170 -170
  486. teradataml/data/jsons/sqle/17.05/GLMPredict.json +122 -122
  487. teradataml/data/jsons/sqle/17.05/MovingAverage.json +367 -367
  488. teradataml/data/jsons/sqle/17.05/NGramSplitter.json +239 -239
  489. teradataml/data/jsons/sqle/17.05/NaiveBayesPredict.json +136 -136
  490. teradataml/data/jsons/sqle/17.05/NaiveBayesTextClassifierPredict.json +235 -235
  491. teradataml/data/jsons/sqle/17.05/Pack.json +98 -98
  492. teradataml/data/jsons/sqle/17.05/SVMSparsePredict.json +162 -162
  493. teradataml/data/jsons/sqle/17.05/Sessionize.json +105 -105
  494. teradataml/data/jsons/sqle/17.05/StringSimilarity.json +86 -86
  495. teradataml/data/jsons/sqle/17.05/Unpack.json +166 -166
  496. teradataml/data/jsons/sqle/17.05/nPath.json +269 -269
  497. teradataml/data/jsons/sqle/17.10/Antiselect.json +56 -56
  498. teradataml/data/jsons/sqle/17.10/Attribution.json +249 -249
  499. teradataml/data/jsons/sqle/17.10/DecisionForestPredict.json +185 -185
  500. teradataml/data/jsons/sqle/17.10/DecisionTreePredict.json +171 -171
  501. teradataml/data/jsons/sqle/17.10/GLMPredict.json +151 -151
  502. teradataml/data/jsons/sqle/17.10/MovingAverage.json +368 -368
  503. teradataml/data/jsons/sqle/17.10/NGramSplitter.json +239 -239
  504. teradataml/data/jsons/sqle/17.10/NaiveBayesPredict.json +149 -149
  505. teradataml/data/jsons/sqle/17.10/NaiveBayesTextClassifierPredict.json +288 -288
  506. teradataml/data/jsons/sqle/17.10/Pack.json +133 -133
  507. teradataml/data/jsons/sqle/17.10/SVMSparsePredict.json +193 -193
  508. teradataml/data/jsons/sqle/17.10/Sessionize.json +105 -105
  509. teradataml/data/jsons/sqle/17.10/StringSimilarity.json +86 -86
  510. teradataml/data/jsons/sqle/17.10/TD_BinCodeFit.json +239 -239
  511. teradataml/data/jsons/sqle/17.10/TD_BinCodeTransform.json +70 -70
  512. teradataml/data/jsons/sqle/17.10/TD_CategoricalSummary.json +53 -53
  513. teradataml/data/jsons/sqle/17.10/TD_Chisq.json +67 -67
  514. teradataml/data/jsons/sqle/17.10/TD_ColumnSummary.json +53 -53
  515. teradataml/data/jsons/sqle/17.10/TD_ConvertTo.json +68 -68
  516. teradataml/data/jsons/sqle/17.10/TD_FTest.json +187 -187
  517. teradataml/data/jsons/sqle/17.10/TD_FillRowID.json +51 -51
  518. teradataml/data/jsons/sqle/17.10/TD_FunctionFit.json +46 -46
  519. teradataml/data/jsons/sqle/17.10/TD_FunctionTransform.json +72 -71
  520. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithMissingValues.json +52 -52
  521. teradataml/data/jsons/sqle/17.10/TD_GetRowsWithoutMissingValues.json +52 -52
  522. teradataml/data/jsons/sqle/17.10/TD_Histogram.json +132 -132
  523. teradataml/data/jsons/sqle/17.10/TD_NumApply.json +147 -147
  524. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingFit.json +182 -182
  525. teradataml/data/jsons/sqle/17.10/TD_OneHotEncodingTransform.json +65 -64
  526. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterFit.json +196 -196
  527. teradataml/data/jsons/sqle/17.10/TD_OutlierFilterTransform.json +48 -47
  528. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesFit.json +114 -114
  529. teradataml/data/jsons/sqle/17.10/TD_PolynomialFeaturesTransform.json +72 -71
  530. teradataml/data/jsons/sqle/17.10/TD_QQNorm.json +111 -111
  531. teradataml/data/jsons/sqle/17.10/TD_RoundColumns.json +93 -93
  532. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeFit.json +127 -127
  533. teradataml/data/jsons/sqle/17.10/TD_RowNormalizeTransform.json +70 -69
  534. teradataml/data/jsons/sqle/17.10/TD_ScaleFit.json +156 -156
  535. teradataml/data/jsons/sqle/17.10/TD_ScaleTransform.json +70 -69
  536. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeFit.json +147 -147
  537. teradataml/data/jsons/sqle/17.10/TD_SimpleImputeTransform.json +48 -47
  538. teradataml/data/jsons/sqle/17.10/TD_StrApply.json +240 -240
  539. teradataml/data/jsons/sqle/17.10/TD_UnivariateStatistics.json +118 -118
  540. teradataml/data/jsons/sqle/17.10/TD_WhichMax.json +52 -52
  541. teradataml/data/jsons/sqle/17.10/TD_WhichMin.json +52 -52
  542. teradataml/data/jsons/sqle/17.10/TD_ZTest.json +171 -171
  543. teradataml/data/jsons/sqle/17.10/Unpack.json +188 -188
  544. teradataml/data/jsons/sqle/17.10/nPath.json +269 -269
  545. teradataml/data/jsons/sqle/17.20/Antiselect.json +56 -56
  546. teradataml/data/jsons/sqle/17.20/Attribution.json +249 -249
  547. teradataml/data/jsons/sqle/17.20/DecisionForestPredict.json +185 -185
  548. teradataml/data/jsons/sqle/17.20/DecisionTreePredict.json +172 -172
  549. teradataml/data/jsons/sqle/17.20/GLMPredict.json +151 -151
  550. teradataml/data/jsons/sqle/17.20/MovingAverage.json +367 -367
  551. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +239 -239
  552. teradataml/data/jsons/sqle/17.20/NaiveBayesPredict.json +149 -149
  553. teradataml/data/jsons/sqle/17.20/NaiveBayesTextClassifierPredict.json +287 -287
  554. teradataml/data/jsons/sqle/17.20/Pack.json +133 -133
  555. teradataml/data/jsons/sqle/17.20/SVMSparsePredict.json +192 -192
  556. teradataml/data/jsons/sqle/17.20/Sessionize.json +105 -105
  557. teradataml/data/jsons/sqle/17.20/StringSimilarity.json +86 -86
  558. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +76 -76
  559. teradataml/data/jsons/sqle/17.20/TD_BinCodeFit.json +239 -239
  560. teradataml/data/jsons/sqle/17.20/TD_BinCodeTransform.json +71 -71
  561. teradataml/data/jsons/sqle/17.20/TD_CategoricalSummary.json +53 -53
  562. teradataml/data/jsons/sqle/17.20/TD_Chisq.json +67 -67
  563. teradataml/data/jsons/sqle/17.20/TD_ClassificationEvaluator.json +145 -145
  564. teradataml/data/jsons/sqle/17.20/TD_ColumnSummary.json +53 -53
  565. teradataml/data/jsons/sqle/17.20/TD_ColumnTransformer.json +218 -218
  566. teradataml/data/jsons/sqle/17.20/TD_ConvertTo.json +92 -92
  567. teradataml/data/jsons/sqle/17.20/TD_DecisionForest.json +259 -259
  568. teradataml/data/jsons/sqle/17.20/TD_DecisionForestPredict.json +139 -139
  569. teradataml/data/jsons/sqle/17.20/TD_FTest.json +186 -186
  570. teradataml/data/jsons/sqle/17.20/TD_FillRowID.json +52 -52
  571. teradataml/data/jsons/sqle/17.20/TD_FunctionFit.json +46 -46
  572. teradataml/data/jsons/sqle/17.20/TD_FunctionTransform.json +72 -72
  573. teradataml/data/jsons/sqle/17.20/TD_GLM.json +431 -431
  574. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +125 -125
  575. teradataml/data/jsons/sqle/17.20/TD_GLMPerSegment.json +411 -411
  576. teradataml/data/jsons/sqle/17.20/TD_GLMPredictPerSegment.json +146 -146
  577. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +91 -91
  578. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithMissingValues.json +76 -76
  579. teradataml/data/jsons/sqle/17.20/TD_GetRowsWithoutMissingValues.json +76 -76
  580. teradataml/data/jsons/sqle/17.20/TD_Histogram.json +152 -152
  581. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +211 -211
  582. teradataml/data/jsons/sqle/17.20/TD_KMeansPredict.json +86 -86
  583. teradataml/data/jsons/sqle/17.20/TD_KNN.json +262 -262
  584. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesTextClassifierTrainer.json +137 -137
  585. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +101 -101
  586. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineTransform.json +71 -71
  587. teradataml/data/jsons/sqle/17.20/TD_NumApply.json +147 -147
  588. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +315 -315
  589. teradataml/data/jsons/sqle/17.20/TD_OneClassSVMPredict.json +123 -123
  590. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingFit.json +271 -271
  591. teradataml/data/jsons/sqle/17.20/TD_OneHotEncodingTransform.json +65 -65
  592. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingFit.json +229 -229
  593. teradataml/data/jsons/sqle/17.20/TD_OrdinalEncodingTransform.json +75 -75
  594. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterFit.json +217 -217
  595. teradataml/data/jsons/sqle/17.20/TD_OutlierFilterTransform.json +48 -48
  596. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesFit.json +114 -114
  597. teradataml/data/jsons/sqle/17.20/TD_PolynomialFeaturesTransform.json +72 -72
  598. teradataml/data/jsons/sqle/17.20/TD_QQNorm.json +111 -111
  599. teradataml/data/jsons/sqle/17.20/TD_ROC.json +177 -177
  600. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionFit.json +178 -178
  601. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionMinComponents.json +73 -73
  602. teradataml/data/jsons/sqle/17.20/TD_RandomProjectionTransform.json +74 -74
  603. teradataml/data/jsons/sqle/17.20/TD_RegressionEvaluator.json +137 -137
  604. teradataml/data/jsons/sqle/17.20/TD_RoundColumns.json +93 -93
  605. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeFit.json +127 -127
  606. teradataml/data/jsons/sqle/17.20/TD_RowNormalizeTransform.json +70 -70
  607. teradataml/data/jsons/sqle/17.20/TD_SVM.json +389 -389
  608. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +124 -124
  609. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +156 -156
  610. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +70 -70
  611. teradataml/data/jsons/sqle/17.20/TD_SentimentExtractor.json +193 -193
  612. teradataml/data/jsons/sqle/17.20/TD_Silhouette.json +142 -142
  613. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeFit.json +147 -147
  614. teradataml/data/jsons/sqle/17.20/TD_SimpleImputeTransform.json +48 -48
  615. teradataml/data/jsons/sqle/17.20/TD_StrApply.json +240 -240
  616. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingFit.json +248 -248
  617. teradataml/data/jsons/sqle/17.20/TD_TargetEncodingTransform.json +75 -75
  618. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +192 -192
  619. teradataml/data/jsons/sqle/17.20/TD_TrainTestSplit.json +142 -142
  620. teradataml/data/jsons/sqle/17.20/TD_UnivariateStatistics.json +117 -117
  621. teradataml/data/jsons/sqle/17.20/TD_VectorDistance.json +182 -182
  622. teradataml/data/jsons/sqle/17.20/TD_WhichMax.json +52 -52
  623. teradataml/data/jsons/sqle/17.20/TD_WhichMin.json +52 -52
  624. teradataml/data/jsons/sqle/17.20/TD_WordEmbeddings.json +241 -241
  625. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +312 -312
  626. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +182 -182
  627. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +170 -170
  628. teradataml/data/jsons/sqle/17.20/Unpack.json +188 -188
  629. teradataml/data/jsons/sqle/17.20/nPath.json +269 -269
  630. teradataml/data/jsons/tableoperator/17.00/read_nos.json +197 -197
  631. teradataml/data/jsons/tableoperator/17.05/read_nos.json +197 -197
  632. teradataml/data/jsons/tableoperator/17.05/write_nos.json +194 -194
  633. teradataml/data/jsons/tableoperator/17.10/read_nos.json +183 -183
  634. teradataml/data/jsons/tableoperator/17.10/write_nos.json +194 -194
  635. teradataml/data/jsons/tableoperator/17.20/read_nos.json +182 -182
  636. teradataml/data/jsons/tableoperator/17.20/write_nos.json +223 -223
  637. teradataml/data/jsons/uaf/17.20/TD_ACF.json +149 -149
  638. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +409 -409
  639. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +79 -79
  640. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +151 -151
  641. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +109 -109
  642. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +107 -107
  643. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +87 -87
  644. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +106 -106
  645. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +80 -80
  646. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +67 -67
  647. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +91 -91
  648. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +136 -136
  649. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +148 -148
  650. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +108 -108
  651. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +109 -109
  652. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +86 -86
  653. teradataml/data/jsons/uaf/17.20/TD_DIFF.json +91 -91
  654. teradataml/data/jsons/uaf/17.20/TD_DTW.json +116 -116
  655. teradataml/data/jsons/uaf/17.20/TD_DURBIN_WATSON.json +100 -100
  656. teradataml/data/jsons/uaf/17.20/TD_EXTRACT_RESULTS.json +38 -38
  657. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +100 -100
  658. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4FORMULA.json +84 -84
  659. teradataml/data/jsons/uaf/17.20/TD_GENSERIES4SINUSOIDS.json +70 -70
  660. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +152 -152
  661. teradataml/data/jsons/uaf/17.20/TD_HOLT_WINTERS_FORECAST.json +313 -313
  662. teradataml/data/jsons/uaf/17.20/TD_IDFFT.json +57 -57
  663. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +94 -94
  664. teradataml/data/jsons/uaf/17.20/TD_INPUTVALIDATOR.json +63 -63
  665. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +181 -181
  666. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +102 -102
  667. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +182 -182
  668. teradataml/data/jsons/uaf/17.20/TD_MATRIXMULTIPLY.json +67 -67
  669. teradataml/data/jsons/uaf/17.20/TD_MINFO.json +66 -66
  670. teradataml/data/jsons/uaf/17.20/TD_MULTIVAR_REGR.json +178 -178
  671. teradataml/data/jsons/uaf/17.20/TD_PACF.json +114 -114
  672. teradataml/data/jsons/uaf/17.20/TD_PORTMAN.json +118 -118
  673. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +175 -175
  674. teradataml/data/jsons/uaf/17.20/TD_POWERTRANSFORM.json +97 -97
  675. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +173 -173
  676. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +136 -136
  677. teradataml/data/jsons/uaf/17.20/TD_SELECTION_CRITERIA.json +89 -89
  678. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_PERIODICITIES.json +79 -79
  679. teradataml/data/jsons/uaf/17.20/TD_SIGNIF_RESIDMEAN.json +67 -67
  680. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +184 -184
  681. teradataml/data/jsons/uaf/17.20/TD_SINFO.json +57 -57
  682. teradataml/data/jsons/uaf/17.20/TD_SMOOTHMA.json +162 -162
  683. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +100 -100
  684. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +111 -111
  685. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +95 -95
  686. teradataml/data/jsons/uaf/17.20/TD_WHITES_GENERAL.json +77 -77
  687. teradataml/data/kmeans_example.json +17 -17
  688. teradataml/data/kmeans_us_arrests_data.csv +0 -0
  689. teradataml/data/knn_example.json +18 -18
  690. teradataml/data/knnrecommender_example.json +6 -6
  691. teradataml/data/knnrecommenderpredict_example.json +12 -12
  692. teradataml/data/lar_example.json +17 -17
  693. teradataml/data/larpredict_example.json +30 -30
  694. teradataml/data/lc_new_predictors.csv +5 -5
  695. teradataml/data/lc_new_reference.csv +9 -9
  696. teradataml/data/lda_example.json +8 -8
  697. teradataml/data/ldainference_example.json +14 -14
  698. teradataml/data/ldatopicsummary_example.json +8 -8
  699. teradataml/data/levendist_input.csv +13 -13
  700. teradataml/data/levenshteindistance_example.json +10 -10
  701. teradataml/data/linreg_example.json +9 -9
  702. teradataml/data/load_example_data.py +326 -323
  703. teradataml/data/loan_prediction.csv +295 -295
  704. teradataml/data/lungcancer.csv +138 -138
  705. teradataml/data/mappingdata.csv +12 -12
  706. teradataml/data/milk_timeseries.csv +157 -157
  707. teradataml/data/min_max_titanic.csv +4 -4
  708. teradataml/data/minhash_example.json +6 -6
  709. teradataml/data/ml_ratings.csv +7547 -7547
  710. teradataml/data/ml_ratings_10.csv +2445 -2445
  711. teradataml/data/model1_table.csv +5 -5
  712. teradataml/data/model2_table.csv +5 -5
  713. teradataml/data/models/iris_db_glm_model.pmml +56 -56
  714. teradataml/data/models/iris_db_xgb_model.pmml +4471 -4471
  715. teradataml/data/modularity_example.json +12 -12
  716. teradataml/data/movavg_example.json +7 -7
  717. teradataml/data/mtx1.csv +7 -7
  718. teradataml/data/mtx2.csv +13 -13
  719. teradataml/data/multi_model_classification.csv +401 -0
  720. teradataml/data/multi_model_regression.csv +401 -0
  721. teradataml/data/mvdfft8.csv +9 -9
  722. teradataml/data/naivebayes_example.json +9 -9
  723. teradataml/data/naivebayespredict_example.json +19 -19
  724. teradataml/data/naivebayestextclassifier2_example.json +6 -6
  725. teradataml/data/naivebayestextclassifier_example.json +8 -8
  726. teradataml/data/naivebayestextclassifierpredict_example.json +20 -20
  727. teradataml/data/name_Find_configure.csv +10 -10
  728. teradataml/data/namedentityfinder_example.json +14 -14
  729. teradataml/data/namedentityfinderevaluator_example.json +10 -10
  730. teradataml/data/namedentityfindertrainer_example.json +6 -6
  731. teradataml/data/nb_iris_input_test.csv +31 -31
  732. teradataml/data/nb_iris_input_train.csv +121 -121
  733. teradataml/data/nbp_iris_model.csv +13 -13
  734. teradataml/data/ner_extractor_text.csv +2 -2
  735. teradataml/data/ner_sports_test2.csv +29 -29
  736. teradataml/data/ner_sports_train.csv +501 -501
  737. teradataml/data/nerevaluator_example.json +5 -5
  738. teradataml/data/nerextractor_example.json +18 -18
  739. teradataml/data/nermem_sports_test.csv +17 -17
  740. teradataml/data/nermem_sports_train.csv +50 -50
  741. teradataml/data/nertrainer_example.json +6 -6
  742. teradataml/data/ngrams_example.json +6 -6
  743. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Aggregate Functions using SQLAlchemy.ipynb +1455 -1455
  744. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Arithmetic Functions Using SQLAlchemy.ipynb +1993 -1993
  745. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Bit-Byte Manipulation Functions using SQLAlchemy.ipynb +1492 -1492
  746. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Built-in functions using SQLAlchemy.ipynb +536 -536
  747. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Regular Expressions Using SQLAlchemy.ipynb +570 -570
  748. teradataml/data/notebooks/sqlalchemy/Teradata Vantage String Functions Using SQLAlchemy.ipynb +2559 -2559
  749. teradataml/data/notebooks/sqlalchemy/Teradata Vantage Window Aggregate Functions using SQLAlchemy.ipynb +2911 -2911
  750. teradataml/data/notebooks/sqlalchemy/Using Generic SQLAlchemy ClauseElements teradataml DataFrame assign method.ipynb +698 -698
  751. teradataml/data/notebooks/sqlalchemy/teradataml filtering using SQLAlchemy ClauseElements.ipynb +784 -784
  752. teradataml/data/npath_example.json +23 -23
  753. teradataml/data/ntree_example.json +14 -14
  754. teradataml/data/numeric_strings.csv +4 -4
  755. teradataml/data/numerics.csv +4 -4
  756. teradataml/data/ocean_buoy.csv +17 -17
  757. teradataml/data/ocean_buoy2.csv +17 -17
  758. teradataml/data/ocean_buoys.csv +27 -27
  759. teradataml/data/ocean_buoys2.csv +10 -10
  760. teradataml/data/ocean_buoys_nonpti.csv +28 -28
  761. teradataml/data/ocean_buoys_seq.csv +29 -29
  762. teradataml/data/openml_example.json +63 -0
  763. teradataml/data/optional_event_table.csv +4 -4
  764. teradataml/data/orders1.csv +11 -11
  765. teradataml/data/orders1_12.csv +12 -12
  766. teradataml/data/orders_ex.csv +4 -4
  767. teradataml/data/pack_example.json +8 -8
  768. teradataml/data/package_tracking.csv +19 -19
  769. teradataml/data/package_tracking_pti.csv +18 -18
  770. teradataml/data/pagerank_example.json +13 -13
  771. teradataml/data/paragraphs_input.csv +6 -6
  772. teradataml/data/pathanalyzer_example.json +7 -7
  773. teradataml/data/pathgenerator_example.json +7 -7
  774. teradataml/data/phrases.csv +7 -7
  775. teradataml/data/pivot_example.json +8 -8
  776. teradataml/data/pivot_input.csv +22 -22
  777. teradataml/data/playerRating.csv +31 -31
  778. teradataml/data/postagger_example.json +6 -6
  779. teradataml/data/posttagger_output.csv +44 -44
  780. teradataml/data/production_data.csv +16 -16
  781. teradataml/data/production_data2.csv +7 -7
  782. teradataml/data/randomsample_example.json +31 -31
  783. teradataml/data/randomwalksample_example.json +8 -8
  784. teradataml/data/rank_table.csv +6 -6
  785. teradataml/data/ref_mobile_data.csv +4 -4
  786. teradataml/data/ref_mobile_data_dense.csv +2 -2
  787. teradataml/data/ref_url.csv +17 -17
  788. teradataml/data/restaurant_reviews.csv +7 -7
  789. teradataml/data/river_data.csv +145 -145
  790. teradataml/data/roc_example.json +7 -7
  791. teradataml/data/roc_input.csv +101 -101
  792. teradataml/data/rule_inputs.csv +6 -6
  793. teradataml/data/rule_table.csv +2 -2
  794. teradataml/data/sales.csv +7 -7
  795. teradataml/data/sales_transaction.csv +501 -501
  796. teradataml/data/salesdata.csv +342 -342
  797. teradataml/data/sample_cities.csv +2 -2
  798. teradataml/data/sample_shapes.csv +10 -10
  799. teradataml/data/sample_streets.csv +2 -2
  800. teradataml/data/sampling_example.json +15 -15
  801. teradataml/data/sax_example.json +8 -8
  802. teradataml/data/scale_example.json +23 -23
  803. teradataml/data/scale_housing.csv +11 -11
  804. teradataml/data/scale_housing_test.csv +6 -6
  805. teradataml/data/scale_stat.csv +11 -11
  806. teradataml/data/scalebypartition_example.json +13 -13
  807. teradataml/data/scalemap_example.json +13 -13
  808. teradataml/data/scalesummary_example.json +12 -12
  809. teradataml/data/score_category.csv +101 -101
  810. teradataml/data/score_summary.csv +4 -4
  811. teradataml/data/script_example.json +9 -9
  812. teradataml/data/scripts/deploy_script.py +65 -0
  813. teradataml/data/scripts/mapper.R +20 -0
  814. teradataml/data/scripts/mapper.py +15 -15
  815. teradataml/data/scripts/mapper_replace.py +15 -15
  816. teradataml/data/scripts/sklearn/__init__.py +0 -0
  817. teradataml/data/scripts/sklearn/sklearn_fit.py +175 -0
  818. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +135 -0
  819. teradataml/data/scripts/sklearn/sklearn_function.template +113 -0
  820. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +158 -0
  821. teradataml/data/scripts/sklearn/sklearn_neighbors.py +152 -0
  822. teradataml/data/scripts/sklearn/sklearn_score.py +128 -0
  823. teradataml/data/scripts/sklearn/sklearn_transform.py +179 -0
  824. teradataml/data/seeds.csv +10 -10
  825. teradataml/data/sentenceextractor_example.json +6 -6
  826. teradataml/data/sentiment_extract_input.csv +11 -11
  827. teradataml/data/sentiment_train.csv +16 -16
  828. teradataml/data/sentiment_word.csv +20 -20
  829. teradataml/data/sentiment_word_input.csv +19 -19
  830. teradataml/data/sentimentextractor_example.json +24 -24
  831. teradataml/data/sentimenttrainer_example.json +8 -8
  832. teradataml/data/sequence_table.csv +10 -10
  833. teradataml/data/seriessplitter_example.json +7 -7
  834. teradataml/data/sessionize_example.json +17 -17
  835. teradataml/data/sessionize_table.csv +116 -116
  836. teradataml/data/setop_test1.csv +24 -24
  837. teradataml/data/setop_test2.csv +22 -22
  838. teradataml/data/soc_nw_edges.csv +10 -10
  839. teradataml/data/soc_nw_vertices.csv +7 -7
  840. teradataml/data/souvenir_timeseries.csv +167 -167
  841. teradataml/data/sparse_iris_attribute.csv +5 -5
  842. teradataml/data/sparse_iris_test.csv +121 -121
  843. teradataml/data/sparse_iris_train.csv +601 -601
  844. teradataml/data/star1.csv +6 -6
  845. teradataml/data/state_transition.csv +5 -5
  846. teradataml/data/stock_data.csv +53 -53
  847. teradataml/data/stock_movement.csv +11 -11
  848. teradataml/data/stock_vol.csv +76 -76
  849. teradataml/data/stop_words.csv +8 -8
  850. teradataml/data/store_sales.csv +37 -37
  851. teradataml/data/stringsimilarity_example.json +7 -7
  852. teradataml/data/strsimilarity_input.csv +13 -13
  853. teradataml/data/students.csv +101 -101
  854. teradataml/data/svm_iris_input_test.csv +121 -121
  855. teradataml/data/svm_iris_input_train.csv +481 -481
  856. teradataml/data/svm_iris_model.csv +7 -7
  857. teradataml/data/svmdense_example.json +9 -9
  858. teradataml/data/svmdensepredict_example.json +18 -18
  859. teradataml/data/svmsparse_example.json +7 -7
  860. teradataml/data/svmsparsepredict_example.json +13 -13
  861. teradataml/data/svmsparsesummary_example.json +7 -7
  862. teradataml/data/target_mobile_data.csv +13 -13
  863. teradataml/data/target_mobile_data_dense.csv +5 -5
  864. teradataml/data/templatedata.csv +1201 -1201
  865. teradataml/data/templates/open_source_ml.json +9 -0
  866. teradataml/data/teradataml_example.json +73 -1
  867. teradataml/data/test_classification.csv +101 -0
  868. teradataml/data/test_loan_prediction.csv +53 -53
  869. teradataml/data/test_pacf_12.csv +37 -37
  870. teradataml/data/test_prediction.csv +101 -0
  871. teradataml/data/test_regression.csv +101 -0
  872. teradataml/data/test_river2.csv +109 -109
  873. teradataml/data/text_inputs.csv +6 -6
  874. teradataml/data/textchunker_example.json +7 -7
  875. teradataml/data/textclassifier_example.json +6 -6
  876. teradataml/data/textclassifier_input.csv +7 -7
  877. teradataml/data/textclassifiertrainer_example.json +6 -6
  878. teradataml/data/textmorph_example.json +5 -5
  879. teradataml/data/textparser_example.json +15 -15
  880. teradataml/data/texttagger_example.json +11 -11
  881. teradataml/data/texttokenizer_example.json +6 -6
  882. teradataml/data/texttrainer_input.csv +11 -11
  883. teradataml/data/tf_example.json +6 -6
  884. teradataml/data/tfidf_example.json +13 -13
  885. teradataml/data/tfidf_input1.csv +201 -201
  886. teradataml/data/tfidf_train.csv +6 -6
  887. teradataml/data/time_table1.csv +535 -535
  888. teradataml/data/time_table2.csv +14 -14
  889. teradataml/data/timeseriesdata.csv +1601 -1601
  890. teradataml/data/timeseriesdatasetsd4.csv +105 -105
  891. teradataml/data/titanic.csv +892 -892
  892. teradataml/data/token_table.csv +696 -696
  893. teradataml/data/train_multiclass.csv +101 -0
  894. teradataml/data/train_regression.csv +101 -0
  895. teradataml/data/train_regression_multiple_labels.csv +101 -0
  896. teradataml/data/train_tracking.csv +27 -27
  897. teradataml/data/transformation_table.csv +5 -5
  898. teradataml/data/transformation_table_new.csv +1 -1
  899. teradataml/data/tv_spots.csv +16 -16
  900. teradataml/data/twod_climate_data.csv +117 -117
  901. teradataml/data/uaf_example.json +475 -475
  902. teradataml/data/univariatestatistics_example.json +8 -8
  903. teradataml/data/unpack_example.json +9 -9
  904. teradataml/data/unpivot_example.json +9 -9
  905. teradataml/data/unpivot_input.csv +8 -8
  906. teradataml/data/us_air_pass.csv +36 -36
  907. teradataml/data/us_population.csv +624 -624
  908. teradataml/data/us_states_shapes.csv +52 -52
  909. teradataml/data/varmax_example.json +17 -17
  910. teradataml/data/vectordistance_example.json +25 -25
  911. teradataml/data/ville_climatedata.csv +121 -121
  912. teradataml/data/ville_tempdata.csv +12 -12
  913. teradataml/data/ville_tempdata1.csv +12 -12
  914. teradataml/data/ville_temperature.csv +11 -11
  915. teradataml/data/waveletTable.csv +1605 -1605
  916. teradataml/data/waveletTable2.csv +1605 -1605
  917. teradataml/data/weightedmovavg_example.json +8 -8
  918. teradataml/data/wft_testing.csv +5 -5
  919. teradataml/data/wine_data.csv +1600 -0
  920. teradataml/data/word_embed_input_table1.csv +5 -5
  921. teradataml/data/word_embed_input_table2.csv +4 -4
  922. teradataml/data/word_embed_model.csv +22 -22
  923. teradataml/data/words_input.csv +13 -13
  924. teradataml/data/xconvolve_complex_left.csv +6 -6
  925. teradataml/data/xconvolve_complex_leftmulti.csv +6 -6
  926. teradataml/data/xgboost_example.json +35 -35
  927. teradataml/data/xgboostpredict_example.json +31 -31
  928. teradataml/dataframe/copy_to.py +1764 -1698
  929. teradataml/dataframe/data_transfer.py +2753 -2745
  930. teradataml/dataframe/dataframe.py +17545 -16946
  931. teradataml/dataframe/dataframe_utils.py +1837 -1740
  932. teradataml/dataframe/fastload.py +611 -603
  933. teradataml/dataframe/indexer.py +424 -424
  934. teradataml/dataframe/setop.py +1179 -1166
  935. teradataml/dataframe/sql.py +10090 -6432
  936. teradataml/dataframe/sql_function_parameters.py +439 -388
  937. teradataml/dataframe/sql_functions.py +652 -652
  938. teradataml/dataframe/sql_interfaces.py +220 -220
  939. teradataml/dataframe/vantage_function_types.py +674 -630
  940. teradataml/dataframe/window.py +693 -692
  941. teradataml/dbutils/__init__.py +3 -3
  942. teradataml/dbutils/dbutils.py +1167 -1150
  943. teradataml/dbutils/filemgr.py +267 -267
  944. teradataml/gen_ai/__init__.py +2 -2
  945. teradataml/gen_ai/convAI.py +472 -472
  946. teradataml/geospatial/__init__.py +3 -3
  947. teradataml/geospatial/geodataframe.py +1105 -1094
  948. teradataml/geospatial/geodataframecolumn.py +392 -387
  949. teradataml/geospatial/geometry_types.py +925 -925
  950. teradataml/hyperparameter_tuner/__init__.py +1 -1
  951. teradataml/hyperparameter_tuner/optimizer.py +3783 -2993
  952. teradataml/hyperparameter_tuner/utils.py +281 -187
  953. teradataml/lib/aed_0_1.dll +0 -0
  954. teradataml/lib/libaed_0_1.dylib +0 -0
  955. teradataml/lib/libaed_0_1.so +0 -0
  956. teradataml/libaed_0_1.dylib +0 -0
  957. teradataml/libaed_0_1.so +0 -0
  958. teradataml/opensource/__init__.py +1 -0
  959. teradataml/opensource/sklearn/__init__.py +1 -0
  960. teradataml/opensource/sklearn/_class.py +255 -0
  961. teradataml/opensource/sklearn/_sklearn_wrapper.py +1668 -0
  962. teradataml/opensource/sklearn/_wrapper_utils.py +268 -0
  963. teradataml/opensource/sklearn/constants.py +54 -0
  964. teradataml/options/__init__.py +121 -124
  965. teradataml/options/configure.py +337 -336
  966. teradataml/options/display.py +176 -176
  967. teradataml/plot/__init__.py +2 -2
  968. teradataml/plot/axis.py +1388 -1388
  969. teradataml/plot/constants.py +15 -15
  970. teradataml/plot/figure.py +398 -398
  971. teradataml/plot/plot.py +760 -760
  972. teradataml/plot/query_generator.py +83 -83
  973. teradataml/plot/subplot.py +216 -216
  974. teradataml/scriptmgmt/UserEnv.py +3788 -3761
  975. teradataml/scriptmgmt/__init__.py +3 -3
  976. teradataml/scriptmgmt/lls_utils.py +1616 -1604
  977. teradataml/series/series.py +532 -532
  978. teradataml/series/series_utils.py +71 -71
  979. teradataml/table_operators/Apply.py +949 -917
  980. teradataml/table_operators/Script.py +1719 -1982
  981. teradataml/table_operators/TableOperator.py +1207 -1616
  982. teradataml/table_operators/__init__.py +2 -3
  983. teradataml/table_operators/apply_query_generator.py +262 -262
  984. teradataml/table_operators/query_generator.py +507 -507
  985. teradataml/table_operators/table_operator_query_generator.py +460 -460
  986. teradataml/table_operators/table_operator_util.py +631 -639
  987. teradataml/table_operators/templates/dataframe_apply.template +184 -184
  988. teradataml/table_operators/templates/dataframe_map.template +176 -176
  989. teradataml/table_operators/templates/script_executor.template +170 -170
  990. teradataml/utils/dtypes.py +684 -684
  991. teradataml/utils/internal_buffer.py +84 -84
  992. teradataml/utils/print_versions.py +205 -205
  993. teradataml/utils/utils.py +410 -410
  994. teradataml/utils/validators.py +2239 -2115
  995. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/METADATA +270 -41
  996. teradataml-20.0.0.0.dist-info/RECORD +1038 -0
  997. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/WHEEL +1 -1
  998. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/zip-safe +1 -1
  999. teradataml/analytics/mle/AdaBoost.py +0 -651
  1000. teradataml/analytics/mle/AdaBoostPredict.py +0 -564
  1001. teradataml/analytics/mle/Antiselect.py +0 -342
  1002. teradataml/analytics/mle/Arima.py +0 -641
  1003. teradataml/analytics/mle/ArimaPredict.py +0 -477
  1004. teradataml/analytics/mle/Attribution.py +0 -1070
  1005. teradataml/analytics/mle/Betweenness.py +0 -658
  1006. teradataml/analytics/mle/Burst.py +0 -711
  1007. teradataml/analytics/mle/CCM.py +0 -600
  1008. teradataml/analytics/mle/CCMPrepare.py +0 -324
  1009. teradataml/analytics/mle/CFilter.py +0 -460
  1010. teradataml/analytics/mle/ChangePointDetection.py +0 -572
  1011. teradataml/analytics/mle/ChangePointDetectionRT.py +0 -477
  1012. teradataml/analytics/mle/Closeness.py +0 -737
  1013. teradataml/analytics/mle/ConfusionMatrix.py +0 -420
  1014. teradataml/analytics/mle/Correlation.py +0 -477
  1015. teradataml/analytics/mle/Correlation2.py +0 -573
  1016. teradataml/analytics/mle/CoxHazardRatio.py +0 -679
  1017. teradataml/analytics/mle/CoxPH.py +0 -556
  1018. teradataml/analytics/mle/CoxSurvival.py +0 -478
  1019. teradataml/analytics/mle/CumulativeMovAvg.py +0 -363
  1020. teradataml/analytics/mle/DTW.py +0 -623
  1021. teradataml/analytics/mle/DWT.py +0 -564
  1022. teradataml/analytics/mle/DWT2D.py +0 -599
  1023. teradataml/analytics/mle/DecisionForest.py +0 -716
  1024. teradataml/analytics/mle/DecisionForestEvaluator.py +0 -363
  1025. teradataml/analytics/mle/DecisionForestPredict.py +0 -561
  1026. teradataml/analytics/mle/DecisionTree.py +0 -830
  1027. teradataml/analytics/mle/DecisionTreePredict.py +0 -528
  1028. teradataml/analytics/mle/ExponentialMovAvg.py +0 -418
  1029. teradataml/analytics/mle/FMeasure.py +0 -402
  1030. teradataml/analytics/mle/FPGrowth.py +0 -734
  1031. teradataml/analytics/mle/FrequentPaths.py +0 -695
  1032. teradataml/analytics/mle/GLM.py +0 -558
  1033. teradataml/analytics/mle/GLML1L2.py +0 -547
  1034. teradataml/analytics/mle/GLML1L2Predict.py +0 -519
  1035. teradataml/analytics/mle/GLMPredict.py +0 -529
  1036. teradataml/analytics/mle/HMMDecoder.py +0 -945
  1037. teradataml/analytics/mle/HMMEvaluator.py +0 -901
  1038. teradataml/analytics/mle/HMMSupervised.py +0 -521
  1039. teradataml/analytics/mle/HMMUnsupervised.py +0 -572
  1040. teradataml/analytics/mle/Histogram.py +0 -561
  1041. teradataml/analytics/mle/IDWT.py +0 -476
  1042. teradataml/analytics/mle/IDWT2D.py +0 -493
  1043. teradataml/analytics/mle/IdentityMatch.py +0 -763
  1044. teradataml/analytics/mle/Interpolator.py +0 -918
  1045. teradataml/analytics/mle/KMeans.py +0 -485
  1046. teradataml/analytics/mle/KNN.py +0 -627
  1047. teradataml/analytics/mle/KNNRecommender.py +0 -488
  1048. teradataml/analytics/mle/KNNRecommenderPredict.py +0 -581
  1049. teradataml/analytics/mle/LAR.py +0 -439
  1050. teradataml/analytics/mle/LARPredict.py +0 -478
  1051. teradataml/analytics/mle/LDA.py +0 -548
  1052. teradataml/analytics/mle/LDAInference.py +0 -492
  1053. teradataml/analytics/mle/LDATopicSummary.py +0 -464
  1054. teradataml/analytics/mle/LevenshteinDistance.py +0 -450
  1055. teradataml/analytics/mle/LinReg.py +0 -433
  1056. teradataml/analytics/mle/LinRegPredict.py +0 -438
  1057. teradataml/analytics/mle/MinHash.py +0 -544
  1058. teradataml/analytics/mle/Modularity.py +0 -587
  1059. teradataml/analytics/mle/NEREvaluator.py +0 -410
  1060. teradataml/analytics/mle/NERExtractor.py +0 -595
  1061. teradataml/analytics/mle/NERTrainer.py +0 -458
  1062. teradataml/analytics/mle/NGrams.py +0 -570
  1063. teradataml/analytics/mle/NPath.py +0 -634
  1064. teradataml/analytics/mle/NTree.py +0 -549
  1065. teradataml/analytics/mle/NaiveBayes.py +0 -462
  1066. teradataml/analytics/mle/NaiveBayesPredict.py +0 -513
  1067. teradataml/analytics/mle/NaiveBayesTextClassifier.py +0 -607
  1068. teradataml/analytics/mle/NaiveBayesTextClassifier2.py +0 -531
  1069. teradataml/analytics/mle/NaiveBayesTextClassifierPredict.py +0 -799
  1070. teradataml/analytics/mle/NamedEntityFinder.py +0 -529
  1071. teradataml/analytics/mle/NamedEntityFinderEvaluator.py +0 -414
  1072. teradataml/analytics/mle/NamedEntityFinderTrainer.py +0 -396
  1073. teradataml/analytics/mle/POSTagger.py +0 -417
  1074. teradataml/analytics/mle/Pack.py +0 -411
  1075. teradataml/analytics/mle/PageRank.py +0 -535
  1076. teradataml/analytics/mle/PathAnalyzer.py +0 -426
  1077. teradataml/analytics/mle/PathGenerator.py +0 -367
  1078. teradataml/analytics/mle/PathStart.py +0 -464
  1079. teradataml/analytics/mle/PathSummarizer.py +0 -470
  1080. teradataml/analytics/mle/Pivot.py +0 -471
  1081. teradataml/analytics/mle/ROC.py +0 -425
  1082. teradataml/analytics/mle/RandomSample.py +0 -637
  1083. teradataml/analytics/mle/RandomWalkSample.py +0 -490
  1084. teradataml/analytics/mle/SAX.py +0 -779
  1085. teradataml/analytics/mle/SVMDense.py +0 -677
  1086. teradataml/analytics/mle/SVMDensePredict.py +0 -536
  1087. teradataml/analytics/mle/SVMDenseSummary.py +0 -437
  1088. teradataml/analytics/mle/SVMSparse.py +0 -557
  1089. teradataml/analytics/mle/SVMSparsePredict.py +0 -553
  1090. teradataml/analytics/mle/SVMSparseSummary.py +0 -435
  1091. teradataml/analytics/mle/Sampling.py +0 -549
  1092. teradataml/analytics/mle/Scale.py +0 -565
  1093. teradataml/analytics/mle/ScaleByPartition.py +0 -496
  1094. teradataml/analytics/mle/ScaleMap.py +0 -378
  1095. teradataml/analytics/mle/ScaleSummary.py +0 -320
  1096. teradataml/analytics/mle/SentenceExtractor.py +0 -363
  1097. teradataml/analytics/mle/SentimentEvaluator.py +0 -432
  1098. teradataml/analytics/mle/SentimentExtractor.py +0 -578
  1099. teradataml/analytics/mle/SentimentTrainer.py +0 -405
  1100. teradataml/analytics/mle/SeriesSplitter.py +0 -641
  1101. teradataml/analytics/mle/Sessionize.py +0 -475
  1102. teradataml/analytics/mle/SimpleMovAvg.py +0 -397
  1103. teradataml/analytics/mle/StringSimilarity.py +0 -425
  1104. teradataml/analytics/mle/TF.py +0 -389
  1105. teradataml/analytics/mle/TFIDF.py +0 -504
  1106. teradataml/analytics/mle/TextChunker.py +0 -414
  1107. teradataml/analytics/mle/TextClassifier.py +0 -399
  1108. teradataml/analytics/mle/TextClassifierEvaluator.py +0 -413
  1109. teradataml/analytics/mle/TextClassifierTrainer.py +0 -565
  1110. teradataml/analytics/mle/TextMorph.py +0 -494
  1111. teradataml/analytics/mle/TextParser.py +0 -623
  1112. teradataml/analytics/mle/TextTagger.py +0 -530
  1113. teradataml/analytics/mle/TextTokenizer.py +0 -502
  1114. teradataml/analytics/mle/UnivariateStatistics.py +0 -488
  1115. teradataml/analytics/mle/Unpack.py +0 -526
  1116. teradataml/analytics/mle/Unpivot.py +0 -438
  1117. teradataml/analytics/mle/VarMax.py +0 -776
  1118. teradataml/analytics/mle/VectorDistance.py +0 -762
  1119. teradataml/analytics/mle/WeightedMovAvg.py +0 -400
  1120. teradataml/analytics/mle/XGBoost.py +0 -842
  1121. teradataml/analytics/mle/XGBoostPredict.py +0 -627
  1122. teradataml/analytics/mle/__init__.py +0 -123
  1123. teradataml/analytics/mle/json/adaboost_mle.json +0 -135
  1124. teradataml/analytics/mle/json/adaboostpredict_mle.json +0 -85
  1125. teradataml/analytics/mle/json/antiselect_mle.json +0 -34
  1126. teradataml/analytics/mle/json/antiselect_mle_mle.json +0 -34
  1127. teradataml/analytics/mle/json/arima_mle.json +0 -172
  1128. teradataml/analytics/mle/json/arimapredict_mle.json +0 -52
  1129. teradataml/analytics/mle/json/attribution_mle_mle.json +0 -143
  1130. teradataml/analytics/mle/json/betweenness_mle.json +0 -97
  1131. teradataml/analytics/mle/json/burst_mle.json +0 -140
  1132. teradataml/analytics/mle/json/ccm_mle.json +0 -124
  1133. teradataml/analytics/mle/json/ccmprepare_mle.json +0 -14
  1134. teradataml/analytics/mle/json/cfilter_mle.json +0 -93
  1135. teradataml/analytics/mle/json/changepointdetection_mle.json +0 -92
  1136. teradataml/analytics/mle/json/changepointdetectionrt_mle.json +0 -78
  1137. teradataml/analytics/mle/json/closeness_mle.json +0 -104
  1138. teradataml/analytics/mle/json/confusionmatrix_mle.json +0 -79
  1139. teradataml/analytics/mle/json/correlation_mle.json +0 -86
  1140. teradataml/analytics/mle/json/correlationreduce_mle.json +0 -49
  1141. teradataml/analytics/mle/json/coxhazardratio_mle.json +0 -89
  1142. teradataml/analytics/mle/json/coxph_mle.json +0 -98
  1143. teradataml/analytics/mle/json/coxsurvival_mle.json +0 -79
  1144. teradataml/analytics/mle/json/cumulativemovavg_mle.json +0 -34
  1145. teradataml/analytics/mle/json/decisionforest_mle.json +0 -167
  1146. teradataml/analytics/mle/json/decisionforestevaluator_mle.json +0 -33
  1147. teradataml/analytics/mle/json/decisionforestpredict_mle_mle.json +0 -74
  1148. teradataml/analytics/mle/json/decisiontree_mle.json +0 -194
  1149. teradataml/analytics/mle/json/decisiontreepredict_mle_mle.json +0 -86
  1150. teradataml/analytics/mle/json/dtw_mle.json +0 -97
  1151. teradataml/analytics/mle/json/dwt2d_mle.json +0 -116
  1152. teradataml/analytics/mle/json/dwt_mle.json +0 -101
  1153. teradataml/analytics/mle/json/exponentialmovavg_mle.json +0 -55
  1154. teradataml/analytics/mle/json/fmeasure_mle.json +0 -58
  1155. teradataml/analytics/mle/json/fpgrowth_mle.json +0 -159
  1156. teradataml/analytics/mle/json/frequentpaths_mle.json +0 -129
  1157. teradataml/analytics/mle/json/glm_mle.json +0 -111
  1158. teradataml/analytics/mle/json/glml1l2_mle.json +0 -106
  1159. teradataml/analytics/mle/json/glml1l2predict_mle.json +0 -57
  1160. teradataml/analytics/mle/json/glmpredict_mle_mle.json +0 -74
  1161. teradataml/analytics/mle/json/histogram_mle.json +0 -100
  1162. teradataml/analytics/mle/json/hmmdecoder_mle.json +0 -192
  1163. teradataml/analytics/mle/json/hmmevaluator_mle.json +0 -206
  1164. teradataml/analytics/mle/json/hmmsupervised_mle.json +0 -91
  1165. teradataml/analytics/mle/json/hmmunsupervised_mle.json +0 -114
  1166. teradataml/analytics/mle/json/identitymatch_mle.json +0 -88
  1167. teradataml/analytics/mle/json/idwt2d_mle.json +0 -73
  1168. teradataml/analytics/mle/json/idwt_mle.json +0 -66
  1169. teradataml/analytics/mle/json/interpolator_mle.json +0 -151
  1170. teradataml/analytics/mle/json/kmeans_mle.json +0 -97
  1171. teradataml/analytics/mle/json/knn_mle.json +0 -141
  1172. teradataml/analytics/mle/json/knnrecommender_mle.json +0 -111
  1173. teradataml/analytics/mle/json/knnrecommenderpredict_mle.json +0 -75
  1174. teradataml/analytics/mle/json/lar_mle.json +0 -78
  1175. teradataml/analytics/mle/json/larpredict_mle.json +0 -69
  1176. teradataml/analytics/mle/json/lda_mle.json +0 -130
  1177. teradataml/analytics/mle/json/ldainference_mle.json +0 -78
  1178. teradataml/analytics/mle/json/ldatopicsummary_mle.json +0 -64
  1179. teradataml/analytics/mle/json/levenshteindistance_mle.json +0 -92
  1180. teradataml/analytics/mle/json/linreg_mle.json +0 -42
  1181. teradataml/analytics/mle/json/linregpredict_mle.json +0 -56
  1182. teradataml/analytics/mle/json/minhash_mle.json +0 -113
  1183. teradataml/analytics/mle/json/modularity_mle.json +0 -91
  1184. teradataml/analytics/mle/json/naivebayespredict_mle_mle.json +0 -85
  1185. teradataml/analytics/mle/json/naivebayesreduce_mle.json +0 -52
  1186. teradataml/analytics/mle/json/naivebayestextclassifierpredict_mle_mle.json +0 -147
  1187. teradataml/analytics/mle/json/naivebayestextclassifiertrainer2_mle.json +0 -108
  1188. teradataml/analytics/mle/json/naivebayestextclassifiertrainer_mle.json +0 -102
  1189. teradataml/analytics/mle/json/namedentityfinder_mle.json +0 -84
  1190. teradataml/analytics/mle/json/namedentityfinderevaluatorreduce_mle.json +0 -43
  1191. teradataml/analytics/mle/json/namedentityfindertrainer_mle.json +0 -64
  1192. teradataml/analytics/mle/json/nerevaluator_mle.json +0 -54
  1193. teradataml/analytics/mle/json/nerextractor_mle.json +0 -87
  1194. teradataml/analytics/mle/json/nertrainer_mle.json +0 -89
  1195. teradataml/analytics/mle/json/ngrams_mle.json +0 -137
  1196. teradataml/analytics/mle/json/ngramsplitter_mle_mle.json +0 -137
  1197. teradataml/analytics/mle/json/npath@coprocessor_mle.json +0 -73
  1198. teradataml/analytics/mle/json/ntree@coprocessor_mle.json +0 -123
  1199. teradataml/analytics/mle/json/pack_mle.json +0 -58
  1200. teradataml/analytics/mle/json/pack_mle_mle.json +0 -58
  1201. teradataml/analytics/mle/json/pagerank_mle.json +0 -81
  1202. teradataml/analytics/mle/json/pathanalyzer_mle.json +0 -63
  1203. teradataml/analytics/mle/json/pathgenerator_mle.json +0 -40
  1204. teradataml/analytics/mle/json/pathstart_mle.json +0 -62
  1205. teradataml/analytics/mle/json/pathsummarizer_mle.json +0 -72
  1206. teradataml/analytics/mle/json/pivoting_mle.json +0 -71
  1207. teradataml/analytics/mle/json/postagger_mle.json +0 -51
  1208. teradataml/analytics/mle/json/randomsample_mle.json +0 -131
  1209. teradataml/analytics/mle/json/randomwalksample_mle.json +0 -85
  1210. teradataml/analytics/mle/json/roc_mle.json +0 -73
  1211. teradataml/analytics/mle/json/sampling_mle.json +0 -75
  1212. teradataml/analytics/mle/json/sax_mle.json +0 -154
  1213. teradataml/analytics/mle/json/scale_mle.json +0 -93
  1214. teradataml/analytics/mle/json/scalebypartition_mle.json +0 -89
  1215. teradataml/analytics/mle/json/scalemap_mle.json +0 -44
  1216. teradataml/analytics/mle/json/scalesummary_mle.json +0 -14
  1217. teradataml/analytics/mle/json/sentenceextractor_mle.json +0 -41
  1218. teradataml/analytics/mle/json/sentimentevaluator_mle.json +0 -43
  1219. teradataml/analytics/mle/json/sentimentextractor_mle.json +0 -100
  1220. teradataml/analytics/mle/json/sentimenttrainer_mle.json +0 -68
  1221. teradataml/analytics/mle/json/seriessplitter_mle.json +0 -133
  1222. teradataml/analytics/mle/json/sessionize_mle_mle.json +0 -62
  1223. teradataml/analytics/mle/json/simplemovavg_mle.json +0 -48
  1224. teradataml/analytics/mle/json/stringsimilarity_mle.json +0 -50
  1225. teradataml/analytics/mle/json/stringsimilarity_mle_mle.json +0 -50
  1226. teradataml/analytics/mle/json/svmdense_mle.json +0 -165
  1227. teradataml/analytics/mle/json/svmdensepredict_mle.json +0 -95
  1228. teradataml/analytics/mle/json/svmdensesummary_mle.json +0 -58
  1229. teradataml/analytics/mle/json/svmsparse_mle.json +0 -148
  1230. teradataml/analytics/mle/json/svmsparsepredict_mle_mle.json +0 -103
  1231. teradataml/analytics/mle/json/svmsparsesummary_mle.json +0 -57
  1232. teradataml/analytics/mle/json/textchunker_mle.json +0 -40
  1233. teradataml/analytics/mle/json/textclassifier_mle.json +0 -51
  1234. teradataml/analytics/mle/json/textclassifierevaluator_mle.json +0 -43
  1235. teradataml/analytics/mle/json/textclassifiertrainer_mle.json +0 -103
  1236. teradataml/analytics/mle/json/textmorph_mle.json +0 -63
  1237. teradataml/analytics/mle/json/textparser_mle.json +0 -166
  1238. teradataml/analytics/mle/json/texttagger_mle.json +0 -81
  1239. teradataml/analytics/mle/json/texttokenizer_mle.json +0 -91
  1240. teradataml/analytics/mle/json/tf_mle.json +0 -33
  1241. teradataml/analytics/mle/json/tfidf_mle.json +0 -34
  1242. teradataml/analytics/mle/json/univariatestatistics_mle.json +0 -81
  1243. teradataml/analytics/mle/json/unpack_mle.json +0 -91
  1244. teradataml/analytics/mle/json/unpack_mle_mle.json +0 -91
  1245. teradataml/analytics/mle/json/unpivoting_mle.json +0 -63
  1246. teradataml/analytics/mle/json/varmax_mle.json +0 -176
  1247. teradataml/analytics/mle/json/vectordistance_mle.json +0 -179
  1248. teradataml/analytics/mle/json/weightedmovavg_mle.json +0 -48
  1249. teradataml/analytics/mle/json/xgboost_mle.json +0 -178
  1250. teradataml/analytics/mle/json/xgboostpredict_mle.json +0 -104
  1251. teradataml/analytics/sqle/Antiselect.py +0 -321
  1252. teradataml/analytics/sqle/Attribution.py +0 -603
  1253. teradataml/analytics/sqle/DecisionForestPredict.py +0 -408
  1254. teradataml/analytics/sqle/GLMPredict.py +0 -430
  1255. teradataml/analytics/sqle/MovingAverage.py +0 -543
  1256. teradataml/analytics/sqle/NGramSplitter.py +0 -548
  1257. teradataml/analytics/sqle/NPath.py +0 -632
  1258. teradataml/analytics/sqle/NaiveBayesTextClassifierPredict.py +0 -515
  1259. teradataml/analytics/sqle/Pack.py +0 -388
  1260. teradataml/analytics/sqle/SVMSparsePredict.py +0 -464
  1261. teradataml/analytics/sqle/Sessionize.py +0 -390
  1262. teradataml/analytics/sqle/StringSimilarity.py +0 -400
  1263. teradataml/analytics/sqle/Unpack.py +0 -503
  1264. teradataml/analytics/sqle/json/antiselect_sqle.json +0 -21
  1265. teradataml/analytics/sqle/json/attribution_sqle.json +0 -92
  1266. teradataml/analytics/sqle/json/decisionforestpredict_sqle.json +0 -48
  1267. teradataml/analytics/sqle/json/glmpredict_sqle.json +0 -48
  1268. teradataml/analytics/sqle/json/h2opredict_sqle.json +0 -63
  1269. teradataml/analytics/sqle/json/movingaverage_sqle.json +0 -58
  1270. teradataml/analytics/sqle/json/naivebayestextclassifierpredict_sqle.json +0 -76
  1271. teradataml/analytics/sqle/json/ngramsplitter_sqle.json +0 -126
  1272. teradataml/analytics/sqle/json/npath_sqle.json +0 -67
  1273. teradataml/analytics/sqle/json/pack_sqle.json +0 -47
  1274. teradataml/analytics/sqle/json/pmmlpredict_sqle.json +0 -55
  1275. teradataml/analytics/sqle/json/sessionize_sqle.json +0 -43
  1276. teradataml/analytics/sqle/json/stringsimilarity_sqle.json +0 -39
  1277. teradataml/analytics/sqle/json/svmsparsepredict_sqle.json +0 -74
  1278. teradataml/analytics/sqle/json/unpack_sqle.json +0 -80
  1279. teradataml/catalog/model_cataloging.py +0 -980
  1280. teradataml/config/mlengine_alias_definitions_v1.0 +0 -118
  1281. teradataml/config/mlengine_alias_definitions_v1.1 +0 -127
  1282. teradataml/config/mlengine_alias_definitions_v1.3 +0 -129
  1283. teradataml/table_operators/sandbox_container_util.py +0 -643
  1284. teradataml-17.20.0.7.dist-info/RECORD +0 -1280
  1285. {teradataml-17.20.0.7.dist-info → teradataml-20.0.0.0.dist-info}/top_level.txt +0 -0
@@ -0,0 +1,1668 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2023 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Adithya Avvaru (adithya.avvaru@teradata.com)
7
+ # Secondary Owner: Pankaj Purandare (pankajvinod.purandare@teradata.com)
8
+ #
9
+ # Version: 1.0
10
+ # Function Version: 1.0
11
+ #
12
+ # This file contains object wrapper class for opensource packages and child object
13
+ # wrapper classes for each opensource package. Currently, we have child object
14
+ # wrapper class for scikit-learn.
15
+ #
16
+ # ##################################################################
17
+
18
+ from collections import OrderedDict, defaultdict
19
+ from importlib import import_module
20
+
21
+ import base64
22
+ import functools
23
+ import json
24
+ import numpy
25
+ import os
26
+ import pickle
27
+ import time
28
+ import inspect
29
+ import warnings
30
+ import json
31
+ import random
32
+ import pandas as pd
33
+ from teradatasqlalchemy import BLOB, CLOB, FLOAT, TIMESTAMP, VARCHAR, INTEGER
34
+ import pandas.api.types as pt
35
+
36
+ from teradataml import _TDML_DIRECTORY, Script, TeradataMlException, Apply
37
+ from teradataml.dataframe.copy_to import _get_sqlalchemy_mapping
38
+ from teradataml.common import pylogger
39
+ from teradataml.common.utils import UtilFuncs
40
+ from teradataml.context.context import _get_current_databasename, get_connection
41
+ from teradataml.dbutils.filemgr import install_file, remove_file
42
+ from teradataml.utils.utils import execute_sql
43
+ from teradataml.options.configure import configure
44
+ from teradataml.opensource.sklearn._wrapper_utils import _validate_fit_run, _generate_new_name,\
45
+ _validate_opensource_func_args, _derive_df_and_required_columns, _validate_df_query_type
46
+ from teradataml.opensource.sklearn.constants import OpenSourcePackage, _OSML_MODELS_PRIMARY_INDEX,\
47
+ _OSML_MODELS_TABLE_NAME, _OSML_MODELS_TABLE_COLUMNS_TYPE_DICT, OpensourceModels,\
48
+ _OSML_ADDITIONAL_COLUMN_TYPES
49
+ from teradataml.common.messagecodes import MessageCodes
50
+ from teradataml.common.messages import Messages
51
+ from teradataml.catalog.byom import save_byom, retrieve_byom, delete_byom
52
+ from teradataml.dbutils.dbutils import _create_table
53
+ from teradataml.utils.validators import _Validators
54
+ from teradataml.dataframe.dataframe import DataFrame
55
+ from teradataml.dataframe.dataframe_utils import DataFrameUtils
56
+ from teradataml.scriptmgmt.lls_utils import create_env, get_env
57
+ from teradataml.common.garbagecollector import GarbageCollector
58
+ from teradataml.common.constants import TeradataConstants
59
+
60
+
61
+ logger = pylogger.getLogger()
62
+
63
+ validator = _Validators()
64
+
65
+ installed_model_files = defaultdict(int)
66
+
67
+ class _GenericObjectWrapper:
68
+ def __init__(self) -> None:
69
+ self._db_name = _get_current_databasename()
70
+
71
+ self._scripts_path = os.path.join(_TDML_DIRECTORY, "data", "scripts", "sklearn")
72
+
73
+ # Some random number to be used as partition value if partition_columns is None for fit().
74
+ self._default_data_partition_value = -1001
75
+
76
+ self.modelObj = None
77
+ self._model_data = None
78
+
79
+ self._tdml_tmp_dir = os.path.join(os.path.expanduser("~"), ".teradataml")
80
+
81
+ self._env = None
82
+
83
+ self._is_lake_system = UtilFuncs._is_lake()
84
+
85
+ if self._is_lake_system:
86
+ if configure.openml_user_env is not None:
87
+ self._env = configure.openml_user_env
88
+ else:
89
+ self._create_or_get_env()
90
+ else:
91
+ execute_sql(f"SET SESSION SEARCHUIFDBPATH = {self._db_name};")
92
+
93
+ def _create_or_get_env(self):
94
+ """
95
+ Internal function to return the env if already exists else
96
+ creates the environment using template file and return the env.
97
+ """
98
+ # Get the template file path.
99
+ template_dir_path = os.path.join(_TDML_DIRECTORY, "data", "templates",
100
+ "open_source_ml.json")
101
+
102
+ # Read template file.
103
+ with open(template_dir_path, "r") as r_file:
104
+ data = json.load(r_file)
105
+
106
+ # Get env_name.
107
+ _env_name = data["env_specs"][0]["env_name"]
108
+
109
+ try:
110
+ # Call function to 'openml_env' get env.
111
+ self._env = get_env(_env_name)
112
+ except TeradataMlException as tdml_e:
113
+ # We will get here when error says, env does not exist otherwise raise the exception as is.
114
+ # Env does not exist so create one.
115
+
116
+ exc_msg = "Failed to execute get_env(). User environment '{}' not " \
117
+ "found.".format(_env_name)
118
+ if exc_msg in tdml_e.args[0]:
119
+ print(f"No OpenAF environment with name '{_env_name}' found. Creating one with "\
120
+ "latest supported python and required packages.")
121
+ _env = create_env(template=template_dir_path)
122
+ else:
123
+ raise tdml_e
124
+ except Exception as exc:
125
+ raise exc
126
+
127
+ def _get_columns_as_list(self, cols):
128
+ """
129
+ Internal function to get columns as list of strings.
130
+ Empty list is returned if cols is None.
131
+ """
132
+ if cols is None:
133
+ return []
134
+ if not isinstance(cols, list) and not isinstance(cols, tuple):
135
+ return [cols]
136
+ return cols
137
+
138
+ def _get_data_and_data_partition_columns(self, data, feature_columns, label_columns,
139
+ partition_columns=None, group_columns=[]):
140
+ """
141
+ Internal function to generate one new partition column (if not provided) and return
142
+ data and partition columns (either generated or passed one).
143
+ """
144
+ new_partition_columns = self._get_columns_as_list(partition_columns)
145
+
146
+ if not partition_columns:
147
+ # If partition column is not specified, create a partition column and run Script.
148
+ # This runs the Script in one AMP as we are partitioning data using this column
149
+ # which contains only one value.
150
+ new_partition_columns = [_generate_new_name(type="column")]
151
+ data = data.assign(**{new_partition_columns[0]: self._default_data_partition_value})
152
+
153
+ # Filter out partition columns from feature columns and label columns.
154
+ new_partition_columns_filtered = [col for col in new_partition_columns
155
+ if col not in (feature_columns + label_columns + group_columns)]
156
+
157
+ all_columns = feature_columns + label_columns + group_columns + new_partition_columns_filtered
158
+ return data.select(all_columns), new_partition_columns
159
+
160
+ def _run_script(self, data, command, partition_columns, return_types):
161
+ """
162
+ Internal function to run Script(), given the argument needed by STO's or
163
+ Apply's Script.
164
+ """
165
+ if isinstance(partition_columns, list) and len(partition_columns) == 0:
166
+ partition_columns = None
167
+
168
+ if self._is_lake_system:
169
+ obj = Apply(data=data,
170
+ returns=OrderedDict(return_types),
171
+ apply_command=command,
172
+ data_partition_column=partition_columns,
173
+ env_name=self._env,
174
+ delimiter="\t")
175
+ else:
176
+ obj = Script(data=data,
177
+ returns=OrderedDict(return_types),
178
+ script_command=command,
179
+ data_partition_column=partition_columns)
180
+ obj.check_reserved_keyword = False
181
+
182
+ obj.skip_argument_validation = True
183
+ return obj.execute_script(output_style="TABLE")
184
+
185
+ def _install_script_file(self,
186
+ file_identifier=None,
187
+ file_name=None,
188
+ is_binary=False,
189
+ file_location=None):
190
+ """
191
+ Internal function to install script file in Vantage.
192
+ """
193
+ if file_location is None:
194
+ file_location = self._scripts_path
195
+ new_script = os.path.join(file_location, file_name)
196
+
197
+ # _env is set while object creation
198
+ # If not set, it is Vantage Enterprise. Otherwise, it is Vantage Lake.
199
+
200
+ if not self._is_lake_system:
201
+ status = install_file(file_identifier=file_identifier,
202
+ file_path=new_script,
203
+ replace=True,
204
+ suppress_output=True,
205
+ is_binary=is_binary)
206
+ else:
207
+ status = self._env.install_file(file_path=new_script,
208
+ replace=True,
209
+ suppress_output=True)
210
+ if not status:
211
+ raise TeradataMlException(
212
+ f"Script file '{file_name}' failed to get installed/replaced in Vantage."
213
+ )
214
+
215
+ def _get_partition_col_indices_and_types(self, data, partition_columns):
216
+ """
217
+ partition_columns can be from feature columns and label columns.
218
+ So, get the indices and types of these columns from the data columns.
219
+ """
220
+ partition_indices = []
221
+ partition_types = []
222
+ new_partition_columns = []
223
+ for i, col in enumerate(data.columns):
224
+ if col in partition_columns:
225
+ new_partition_columns.append(col)
226
+ partition_indices.append(i)
227
+ partition_types.append(data._td_column_names_and_sqlalchemy_types[col.lower()].\
228
+ python_type.__name__)
229
+ # Converting to string "None" if they are not present as empty string can't be passed
230
+ # to Script script_commands' command line arguments.
231
+ # Otherwise, pass the values as comma separated string.
232
+ partition_indices = ",".join([str(x) for x in partition_indices])\
233
+ if partition_indices else "None"
234
+ partition_types = ",".join([x for x in partition_types]) if partition_types else "None"
235
+ return partition_indices, partition_types, new_partition_columns
236
+
237
+ def _get_kwargs_str(self, kwargs):
238
+ """
239
+ Returns string of kwargs in the format:
240
+ key1 val1-type1 key2 val2-type2 ...
241
+ """
242
+ args_str = ""
243
+ for key, val in kwargs.items():
244
+ strr = f"{key} {str(val)}-{type(val).__name__}"
245
+ if args_str == "":
246
+ args_str += strr
247
+ else:
248
+ args_str += f" {strr}"
249
+ return args_str
250
+
251
+ def extract_sklearn_obj(self, n_unique_partitions = 1, n_partition_cols = 1):
252
+ """
253
+ Internal function to extract sklearn object from the model(s) depending on the number of
254
+ partitions. When it is only one model, it is directly used as sklearn object (modelObj).
255
+ When it is multiple models, it is converted to pandas DataFrame and stored in sklearn
256
+ object.
257
+ """
258
+ vals = execute_sql("select * from {}".format(self._model_data._table_name)).fetchall()
259
+
260
+ # pickle will issue a caution warning, if model pickling was done with
261
+ # different library version than used here. The following disables any warnings
262
+ # that might otherwise show in the scriptlog files on the Advanced SQL Engine
263
+ # nodes in this case. Yet, do keep an eye for incompatible pickle versions.
264
+ warnings.filterwarnings("ignore")
265
+
266
+ model_obj = None
267
+ # Extract and unpickle last column which is the model object.
268
+ for i, row in enumerate(vals):
269
+ if self._is_lake_system:
270
+ model_obj = pickle.loads(row[n_partition_cols])
271
+ else:
272
+ model_obj = pickle.loads(base64.b64decode(row[n_partition_cols].partition("'")[2]))
273
+ row[n_partition_cols] = model_obj
274
+ vals[i] = row
275
+ if n_unique_partitions == 1:
276
+ self.modelObj = model_obj
277
+ elif n_unique_partitions > 1:
278
+ self.modelObj = pd.DataFrame(vals, columns=self._model_data.columns)
279
+ else:
280
+ ValueError("Number of partitions should be greater than 0.")
281
+
282
+ warnings.filterwarnings("default")
283
+
284
+
285
+ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
286
+ # This has to be set for every package which subclasses this class.
287
+ OPENSOURCE_PACKAGE_NAME = None
288
+
289
+ def __init__(self, model=None, module_name=None, class_name=None, pos_args=None, kwargs=None):
290
+ if not model and not module_name and not class_name:
291
+ raise TeradataMlException(Messages.get_message(MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT, "model",
292
+ "module_name and class_name"),
293
+ MessageCodes.EITHER_THIS_OR_THAT_ARGUMENT)
294
+
295
+ validator._validate_mutually_inclusive_arguments(module_name, "module_name",
296
+ class_name, "class_name")
297
+
298
+ super().__init__()
299
+
300
+ self.module_name = module_name
301
+ self.class_name = class_name
302
+ self.kwargs = kwargs if kwargs is not None else {}
303
+ self.pos_args = pos_args if pos_args is not None else tuple()
304
+
305
+ self._fit_label_columns_types = None
306
+ self._table_name_prefix = None
307
+
308
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
309
+ self._fit_partition_colums_non_default = None
310
+ self._is_default_partition_value_predict = True # False when the user provides partition columns.
311
+
312
+ def _validate_equality_of_partition_values(self, fit_values, trans_values):
313
+ """
314
+ Internal function to compare the partition values in fit() and predict() are same.
315
+ """
316
+ if len(fit_values) != len(trans_values):
317
+ return False
318
+
319
+ for val in fit_values:
320
+ if not all([val in trans_values]):
321
+ return False
322
+
323
+ return True
324
+
325
+ def _validate_unique_partition_values(self, data, partition_columns):
326
+ """
327
+ Internal function to validate if the partition values in partition_columns used in fit()
328
+ and predict() are same.
329
+ """
330
+ data._index_label = None
331
+ unique_values = data.drop_duplicate(partition_columns).get_values()
332
+
333
+ trans_unique_values = sorted(unique_values.tolist(), key=lambda x: tuple(x))
334
+ fit_unique_values = sorted(self._fit_partition_unique_values.tolist() \
335
+ if not isinstance(self._fit_partition_unique_values, list) \
336
+ else self._fit_partition_unique_values, key=lambda x: tuple(x))
337
+ default_unique_values = [[self._default_data_partition_value]]
338
+
339
+ if fit_unique_values == default_unique_values and \
340
+ trans_unique_values != default_unique_values:
341
+ error_msg = Messages.get_message(MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT,
342
+ "without", "with")
343
+ msg_code = MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT
344
+ raise TeradataMlException(error_msg, msg_code)
345
+
346
+ if not self._validate_equality_of_partition_values(fit_unique_values, trans_unique_values):
347
+ raise TeradataMlException(
348
+ Messages.get_message(MessageCodes.PARTITION_VALUES_NOT_MATCHING),
349
+ MessageCodes.PARTITION_VALUES_NOT_MATCHING
350
+ )
351
+
352
+ def fit(self, **kwargs):
353
+ pass
354
+
355
+ def __get_obj_attributes_multi_model(self, name):
356
+ """
357
+ Internal function to get attributes of all sklearn model objects when multiple models are
358
+ generated by fit.
359
+ """
360
+ # Wrapper function to invoke dynamic method, using arguments
361
+ # passed by user, on model in each row.
362
+ def __sklearn_method_invoker_for_multimodel(*c, **kwargs):
363
+ multi_models = self.modelObj.copy()
364
+ for i in range(multi_models.shape[0]):
365
+ curr_model = multi_models.iloc[i]["model"]
366
+ multi_models.at[i, "model"] = getattr(curr_model, name)(*c, **kwargs)
367
+ return multi_models.rename(columns={"model": name})
368
+
369
+ # Identify if attribute is callable or not to avoid
370
+ # this check in loop for every model.
371
+ is_attr_callable = False
372
+ # Assuming that self.modelObj will have at least 1 row.
373
+ is_attr_callable = callable(getattr(self.modelObj.iloc[0]["model"], name))
374
+
375
+ # If attribute is callable, it should be applied on model in each row
376
+ # using passed arguments.
377
+ if is_attr_callable:
378
+ return __sklearn_method_invoker_for_multimodel
379
+
380
+ output_attributes = self.modelObj.copy()
381
+ for i in range(output_attributes.shape[0]):
382
+ model = output_attributes.iloc[i]["model"]
383
+ output_attributes.at[i, "model"] = getattr(model, name)
384
+ return output_attributes.rename(columns={"model": name})
385
+
386
+ def __getattr__(self, name):
387
+ # This just run attributes (functions and properties) from sklearn object.
388
+ def __sklearn_method_invoker(*c, **kwargs):
389
+ return atrribute_instance(*c, **kwargs)
390
+ if isinstance(self.modelObj, pd.DataFrame):
391
+ return self.__get_obj_attributes_multi_model(name)
392
+
393
+ atrribute_instance = getattr(self.modelObj, name)
394
+ if callable(atrribute_instance):
395
+ return __sklearn_method_invoker
396
+ return atrribute_instance
397
+
398
+ @classmethod
399
+ def _validate_model_supportability(cls, model):
400
+ """
401
+ Internal function to validate if the model provided for deployment is supported by
402
+ teradataml's opensourceML.
403
+ """
404
+ error_msg = Messages.get_message(MessageCodes.MODEL_CATALOGING_OPERATION_FAILED, "validate",
405
+ "The given model is not a supported opensource model.")
406
+ msg_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
407
+ try:
408
+ # For scikit-learn, model.__module__ is similar to 'sklearn.linear_model._base'.
409
+ # TODO: check for other supported packages.
410
+ if model.__module__.split(".")[0] not in OpenSourcePackage.values():
411
+ raise TeradataMlException(error_msg, msg_code)
412
+ except Exception as ex:
413
+ # If in case, model.__module__ fails.
414
+ raise TeradataMlException(error_msg, msg_code) from ex
415
+
416
+ def _save_model(self, model_name, replace_if_exists=False):
417
+ """
418
+ Internal function to save the model stored in file at location mentioned by class variable
419
+ "model_file_path_local" to Vantage using BYOM methods save_byom() and delete_byom() based
420
+ on the value of "replace_if_exists" argument.
421
+ """
422
+ # Creating a table, if doesn't exist, in Vantage to store the model info.
423
+ conn = get_connection()
424
+ osml_models_table_exists = conn.dialect.has_table(conn,
425
+ table_name=_OSML_MODELS_TABLE_NAME,
426
+ schema=self._db_name)
427
+ if not osml_models_table_exists:
428
+ all_columns = _OSML_MODELS_TABLE_COLUMNS_TYPE_DICT.copy()
429
+ all_columns.update(_OSML_ADDITIONAL_COLUMN_TYPES)
430
+ _create_table(table_name=_OSML_MODELS_TABLE_NAME, columns=all_columns,
431
+ primary_index=_OSML_MODELS_PRIMARY_INDEX, schema_name=self._db_name)
432
+
433
+ model_obj = OpensourceModels(is_default_partition_value=self._is_default_partition_value_fit,
434
+ partition_file_prefix=self._model_file_name_prefix,
435
+ fit_partition_columns_non_default=self._fit_partition_colums_non_default,
436
+ model=self.modelObj,
437
+ pos_args=self.pos_args,
438
+ key_args=self.kwargs)
439
+
440
+ # Saved the model object to a file to be used in save_byom() for writing to Vantage table.
441
+ file_name = os.path.join(self._tdml_tmp_dir, "deployed_file.pickle")
442
+ with open(file_name, "wb+") as fp:
443
+ fp.write(pickle.dumps(model_obj))
444
+
445
+ try:
446
+ save_byom(model_id=model_name,
447
+ model_file=file_name,
448
+ table_name=_OSML_MODELS_TABLE_NAME,
449
+ additional_columns_types=_OSML_ADDITIONAL_COLUMN_TYPES,
450
+ additional_columns={"package": self.OPENSOURCE_PACKAGE_NAME.value})
451
+ except TeradataMlException as ex:
452
+ model_exists_msg = Messages.get_message(MessageCodes.MODEL_ALREADY_EXISTS, model_name)
453
+ if not replace_if_exists and model_exists_msg == str(ex):
454
+ raise
455
+ elif replace_if_exists and model_exists_msg == str(ex):
456
+ # Delete the model from Model table and save again.
457
+ delete_byom(model_id=model_name, table_name=_OSML_MODELS_TABLE_NAME)
458
+ save_byom(model_id=model_name,
459
+ model_file=file_name,
460
+ table_name=_OSML_MODELS_TABLE_NAME,
461
+ additional_columns_types=_OSML_ADDITIONAL_COLUMN_TYPES,
462
+ additional_columns={"package": self.OPENSOURCE_PACKAGE_NAME.value})
463
+ else:
464
+ raise
465
+ finally:
466
+ os.remove(file_name)
467
+
468
+ @classmethod
469
+ def _deploy(cls, model_name, model, replace_if_exists=False):
470
+ """
471
+ Internal function to create an instance of the class using the model and deploy
472
+ the model to Vantage.
473
+ """
474
+ cls._validate_model_supportability(model=model)
475
+
476
+ cls = cls(model=model)
477
+ # Load the model file into Vantage node as file can be used in
478
+ # predict or other operations.
479
+ cls._install_initial_model_file()
480
+
481
+ cls._save_model(model_name, replace_if_exists)
482
+
483
+ return cls
484
+
485
+ @classmethod
486
+ def _load(cls, model_name):
487
+ """
488
+ Internal function to load model corresponding to the package (like sklearn etc)
489
+ from Vantage to client using retrieve_byom() and create an instance of the class if
490
+ the model is from the same package.
491
+ """
492
+ try:
493
+ model = retrieve_byom(model_id=model_name, table_name=_OSML_MODELS_TABLE_NAME,
494
+ return_addition_columns=True)
495
+ except TeradataMlException as ex:
496
+ # Not showing table name in error message as it is an internal table.
497
+ part_msg = f"Model '{model_name}' not found in the table "
498
+ if part_msg in str(ex):
499
+ raise TeradataMlException(Messages.get_message(MessageCodes.MODEL_NOT_FOUND, model_name, ""),
500
+ MessageCodes.MODEL_NOT_FOUND)
501
+ raise
502
+
503
+ model_vals_list = model.get_values()[0]
504
+ # List of 3 elements -
505
+ # - model name as index column,
506
+ # - 1st contains model object with fields: is_default_partition_value, partition_file_prefix, model. etc
507
+ # - 2nd contains package name.
508
+ model_obj = pickle.loads(model_vals_list[0])
509
+ model = model_obj.model
510
+ package = model_vals_list[1]
511
+
512
+ if package != cls.OPENSOURCE_PACKAGE_NAME.value:
513
+ # Raise error if trying to access model of different package.
514
+ raise TeradataMlException(Messages.get_message(MessageCodes.MODEL_NOT_FOUND, model_name,
515
+ f". Requested model is from '{package}' package"),
516
+ MessageCodes.MODEL_NOT_FOUND)
517
+
518
+ if isinstance(model, pd.DataFrame):
519
+ # Create a new instance of the class and set the model object to the instance.
520
+ # Instantiation can take only model, not model object. Hence, passing one of the model
521
+ # from pandas df. Updating modelObj and other fields later
522
+ cls = cls(model=model.iloc[1,2])
523
+ cls.modelObj = model
524
+ cls._fit_partition_unique_values = [lst[:len(lst)-1] for lst in model.values.tolist()]
525
+ else:
526
+ cls = cls(model=model)
527
+
528
+ cls._model_file_name_prefix = model_obj.partition_file_prefix
529
+ cls._is_default_partition_value_fit = model_obj.is_default_partition_value
530
+ cls._fit_partition_colums_non_default = model_obj.fit_partition_columns_non_default
531
+ cls.pos_args = model_obj.pos_args
532
+ cls.kwargs = model_obj.key_args
533
+
534
+ # Load the model file into Vantage node as file can be used in
535
+ # predict or other operations.
536
+ cls._install_initial_model_file()
537
+
538
+ return cls
539
+
540
+ def deploy(self, model_name, replace_if_exists=False):
541
+ """
542
+ DESCRIPTION:
543
+ Deploys the model held by interface object to Vantage.
544
+
545
+ PARAMETERS:
546
+ model_name:
547
+ Required Argument.
548
+ Specifies the unique name of the model to be deployed.
549
+ Types: str
550
+
551
+ replace_if_exists:
552
+ Optional Argument.
553
+ Specifies whether to replace the model if a model with the same name already
554
+ exists in Vantage. If this argument is set to False and a model with the same
555
+ name already exists, then the function raises an exception.
556
+ Default Value: False
557
+ Types: bool
558
+
559
+ RETURNS:
560
+ The opensource object wrapper.
561
+
562
+ RAISES:
563
+ TeradataMLException if model with "model_name" already exists and the argument
564
+ "replace_if_exists" is set to False.
565
+
566
+ EXAMPLES:
567
+ >>> from teradataml import td_sklearn
568
+ >>> model = td_sklearn.LinearRegression(normalize=True)
569
+ >>> model
570
+ LinearRegression(normalize=True)
571
+
572
+ # Example 1: Deploy the model held by interface object to Vantage.
573
+ >>> lin_reg = model.deploy("linreg_model_ver_2")
574
+ Model is saved.
575
+ >>> lin_reg
576
+ LinearRegression(normalize=True)
577
+
578
+ # Example 2: Deploy the model held by interface object to Vantage with the name same
579
+ # as that of model that already existed in Vantage.
580
+ >>> lin_reg = model.deploy("linreg_model_ver_2", replace_if_exists=True)
581
+ Model is deleted.
582
+ Model is saved.
583
+ >>> lin_reg
584
+ LinearRegression(normalize=True)
585
+ """
586
+
587
+ # Install model file into Vantage, if not installed.
588
+ self._install_initial_model_file()
589
+
590
+ self._save_model(model_name, replace_if_exists)
591
+ return self
592
+
593
+
594
+ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
595
+
596
+ OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.SKLEARN
597
+
598
+ def __init__(self, model=None, module_name=None, class_name=None, pos_args=None, kwargs=None):
599
+ super().__init__(model=model, module_name=module_name, class_name=class_name,
600
+ pos_args=pos_args, kwargs=kwargs)
601
+
602
+ self._initialize_variables()
603
+ if model:
604
+ self.modelObj = model
605
+ self.module_name = model.__module__.split("._")[0]
606
+ self.class_name = model.__class__.__name__
607
+ # __dict__ gets all the arguments as dictionary including default ones and positional
608
+ # args.
609
+ self.kwargs = model.__dict__
610
+ self.pos_args = tuple() # Kept empty as all are moved to kwargs.
611
+ else:
612
+ self._initialize_object()
613
+
614
+ def __repr__(self):
615
+ if self._is_default_partition_value_fit:
616
+ # Single model use case.
617
+ return self.modelObj.__repr__()
618
+
619
+ pd.set_option("display.expand_frame_repr", None)
620
+ pd.set_option("display.max_colwidth", None)
621
+ opt = self.modelObj.__repr__()
622
+ pd.reset_option("display.expand_frame_repr")
623
+ pd.reset_option("display.max_colwidth")
624
+ return opt
625
+
626
+ def _validate_args_and_get_data(self, X=None, y=None, groups=None, kwargs={},
627
+ skip_either_or_that=False):
628
+ """
629
+ Internal function to validate arguments passed to exposed opensource APIs and return
630
+ parent DataFrame, feature columns, label columns, group columns, data partition columns.
631
+ """
632
+ _validate_opensource_func_args(X=X, y=y, groups=groups,
633
+ fit_partition_cols=self._fit_partition_colums_non_default,
634
+ kwargs=kwargs,
635
+ skip_either_or_that=skip_either_or_that)
636
+ return _derive_df_and_required_columns(X=X, y=y, groups=groups, kwargs=kwargs,
637
+ fit_partition_cols=self._fit_partition_colums_non_default)
638
+
639
+ def _initialize_object(self):
640
+ """
641
+ Internal function to initialize sklearn object from module name and class name.
642
+ """
643
+ # Needed when writing imported modules to generated file. TODO: Remove later.
644
+ imported_args = {}
645
+ # If there are any objects of class `_SkLearnObjectWrapper`, it is modified to
646
+ # corresponding sklearn object.
647
+ new_sklearn_pos_args = self.modify_args(None, self.pos_args, imported_args)
648
+ new_sklearn_kwargs = self.modify_args(None, self.kwargs, imported_args)
649
+
650
+ # Create model object from new positional and keyword arguments.
651
+ class_obj = getattr(import_module(self.module_name), self.class_name)
652
+ if new_sklearn_pos_args:
653
+ self.modelObj = class_obj(*new_sklearn_pos_args, **new_sklearn_kwargs)
654
+ else:
655
+ self.modelObj = class_obj(**new_sklearn_kwargs)
656
+
657
+ # All arguments are moved to kwargs and kept pos_args empty.
658
+ # Might help in set_params() bug fix.
659
+ self.pos_args = tuple()
660
+ _arguments = self.modelObj.__dict__
661
+
662
+ if hasattr(self.modelObj, "get_params"):
663
+ # Update kwargs that are both in modelObj and get_params() as there are
664
+ # some classes which return other internals variables also.
665
+ # Hence, filtering them using get_params().
666
+ for k, v in _arguments.items():
667
+ if type(v).__name__ in ["function", "generator"]:
668
+ # TODO: ELE-6351: Skipping adding functions and generators to kwargs as these
669
+ # are not supported yet due to pickling issue.
670
+ continue
671
+ if k in self.get_params():
672
+ self.kwargs[k] = v
673
+ else:
674
+ # Model selection classes will not have `get_params`, in which case modelObj's __dict__
675
+ # is saved as kwargs.
676
+ self.kwargs = _arguments
677
+
678
+ def _initialize_variables(self):
679
+ """
680
+ Internal function to initialize variables used in this class.
681
+ """
682
+ self.feature_names_in_ = None
683
+ self._table_name_prefix = "td_sklearn_"
684
+ self._model_file_name_prefix = _generate_new_name(type="file")
685
+ self.model_file_paths_local = set()
686
+
687
+ self._fit_execution_time = None
688
+ self._fit_predict_execution_time = None
689
+ self._partial_fit_execution_time = None
690
+ self._predict_execution_time = None
691
+ self._transform_execution_time = None
692
+ self._score_execution_time = None
693
+
694
+ # Set to partition columns when training is done with partition columns.
695
+ self._fit_partition_colums_non_default = None
696
+
697
+ self._is_model_installed = False
698
+ self._fit_partition_unique_values = [[self._default_data_partition_value]]
699
+
700
+ def modify_args(self, fp1, arg, imported_args):
701
+ """
702
+ Internal function to recursively (if "arg" is list/tuple/dict) check if any sklearn object
703
+ of opensourceML is present in the argument "arg" and modify it to corresponding sklearn
704
+ object.
705
+ This function can also be used to write import statements to file (if "fp1" is not
706
+ None). Update "imported_args" dictionary with imported module and class name to avoid
707
+ importing same module and class again when writing to file. This is useful when we want to
708
+ generate script from template file.
709
+ Pass None to "fp1" if we don't want to write to file and just modify opensourceML sklearn
710
+ object to corresponding sklearn object.
711
+ """
712
+ if isinstance(arg, type(self)):
713
+ imported_tuple = (arg.module_name, arg.class_name)
714
+ already_imported = imported_args.get(imported_tuple, False)
715
+ if not already_imported:
716
+ imported_args[imported_tuple] = True
717
+ if fp1:
718
+ fp1.write(f"from {arg.module_name} import {arg.class_name}\n")
719
+ self.modify_args(fp1, arg.pos_args, imported_args)
720
+ self.modify_args(fp1, arg.kwargs, imported_args)
721
+ return arg.modelObj
722
+ elif isinstance(arg, list):
723
+ return [self.modify_args(fp1, val, imported_args) for val in arg]
724
+ elif isinstance(arg, tuple):
725
+ return tuple([self.modify_args(fp1, val, imported_args) for val in arg])
726
+ elif type(arg).__name__ == "generator":
727
+ # Raising exception as generator object can't be pickled.
728
+ # TODO: ELE-6351 - Find ways to pickle generator object later.
729
+ raise ValueError("Generator type/iterator is not supported for any argument. "\
730
+ "Support will be added later.")
731
+ elif type(arg).__name__ == "function":
732
+ # Raising exception as functions/lambda functions can't be pickled.
733
+ # TODO: ELE-6351 - Find ways to pickle functions later.
734
+ raise ValueError("Functions are not supported for any argument. "\
735
+ "Support will be added later.")
736
+ elif isinstance(arg, dict):
737
+ return dict(
738
+ (
739
+ self.modify_args(fp1, k, imported_args),
740
+ self.modify_args(fp1, v, imported_args),
741
+ )
742
+ for k, v in arg.items()
743
+ )
744
+ else:
745
+ return arg
746
+
747
+ def _install_initial_model_file(self):
748
+ """
749
+ If model file(s) is/are not installed in Vantage, then install it/them.
750
+ """
751
+ if isinstance(self.modelObj, pd.DataFrame):
752
+ # Get list of unique partition values and corresponding model object as dict.
753
+ partition_values_model_dict = {}
754
+ obj_list = self.modelObj.values.tolist()
755
+ for lst in obj_list:
756
+ partition_values_model_dict[tuple(lst[:len(lst)-1])] = lst[len(lst)-1]
757
+
758
+ for partition in self._fit_partition_unique_values:
759
+ # Create a new file with file name with partition values and
760
+ # dump sklearn object into it. Finally install the file to Vantage.
761
+ partition_join = "_".join([str(x) for x in partition])
762
+ file_name = f"{self._model_file_name_prefix}_{partition_join}"
763
+ # Replace '-' with '_' as '-' can't be present in file identifier.
764
+ # Needed this replace because partition_columns can be negative.
765
+ file_name = file_name.replace("-", "_")
766
+ full_file_name = os.path.join(self._tdml_tmp_dir, file_name)
767
+ with open(full_file_name, "wb+") as fp:
768
+ # Write sklearn object to file.
769
+ if isinstance(self.modelObj, pd.DataFrame):
770
+ # If multiple models, then write the model corresponding to the partition value.
771
+ fp.write(pickle.dumps(partition_values_model_dict[tuple(partition)]))
772
+ else:
773
+ fp.write(pickle.dumps(self.modelObj))
774
+ self.model_file_paths_local.add(file_name)
775
+
776
+ self._install_script_file(file_identifier=file_name,
777
+ file_name=file_name,
778
+ is_binary=True,
779
+ file_location=self._tdml_tmp_dir)
780
+
781
+ if self._is_lake_system:
782
+ # Need to pass env_name along with file_name for cleaning up the files in env.
783
+ obj = f"{self._env.env_name}::{file_name}"
784
+ if installed_model_files[obj] == 0:
785
+ # Add to GC for the first time the model file (along with env name) is encountered.
786
+ installed_model_files[obj] = 1
787
+ GarbageCollector._add_to_garbagecollector(object_name=obj,
788
+ object_type=TeradataConstants.TERADATA_APPLY)
789
+ else:
790
+ if installed_model_files[file_name] == 0:
791
+ # Add to GC for the first time the model file is encountered.
792
+ installed_model_files[file_name] = 1
793
+ GarbageCollector._add_to_garbagecollector(object_name=file_name,
794
+ object_type=TeradataConstants.TERADATA_SCRIPT)
795
+
796
+ self._is_model_installed = True
797
+
798
+ def _run_fit_related_functions(self,
799
+ data,
800
+ feature_columns,
801
+ label_columns,
802
+ partition_columns,
803
+ func,
804
+ classes=None):
805
+ """
806
+ Internal function to run fit() and partial_fit() functions.
807
+ """
808
+ label_columns = self._get_columns_as_list(label_columns)
809
+
810
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
811
+ feature_columns,
812
+ label_columns,
813
+ partition_columns)
814
+
815
+ model_type = BLOB() if self._is_lake_system else CLOB()
816
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
817
+ for col in new_partition_columns] + [("model", model_type)]
818
+
819
+ file_name = "sklearn_fit.py"
820
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
821
+
822
+ if classes:
823
+ class_type = type(classes[0]).__name__
824
+ classes = "--".join([str(x) for x in classes])
825
+ else:
826
+ classes = str(None)
827
+ class_type = str(None)
828
+
829
+ partition_indices, partition_types, new_partition_columns = \
830
+ self._get_partition_col_indices_and_types(data, new_partition_columns)
831
+
832
+ # db_name is applicable for enterprise system.
833
+ db_file_name = file_name if self._is_lake_system else f"./{self._db_name}/{file_name}"
834
+ py_exc = UtilFuncs._get_python_execution_path()
835
+ script_command = f"{py_exc} {db_file_name} {func} {len(feature_columns)} "\
836
+ f"{len(label_columns)} {partition_indices} {partition_types} "\
837
+ f"{self._model_file_name_prefix} {classes} {class_type} {self._is_lake_system}"
838
+
839
+ # Get unique values in partitioning columns.
840
+ self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
841
+
842
+ self._install_initial_model_file()
843
+
844
+ self._model_data = self._run_script(data, script_command, new_partition_columns,
845
+ return_types)
846
+
847
+ # Extract sklearn object(s) from the depending on the number of unique partitioning values.
848
+ self.extract_sklearn_obj(n_unique_partitions=len(self._fit_partition_unique_values),
849
+ n_partition_cols=len(new_partition_columns))
850
+
851
+ # Need this label columns types in prediction.
852
+ self._fit_label_columns_types = [data._td_column_names_and_sqlalchemy_types[l_c.lower()]
853
+ for l_c in label_columns]
854
+
855
+ def partial_fit(self, X=None, y=None, classes=None, **kwargs):
856
+ """
857
+ Please check the description in Docs/OpensourceML/sklearn.py.
858
+ """
859
+ st_time = time.time()
860
+
861
+ # "classes" argument validation.
862
+ arg_info_matrix = []
863
+ arg_info_matrix.append(["classes", classes, True, (list)])
864
+ _Validators._validate_function_arguments(arg_info_matrix)
865
+
866
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
867
+
868
+ data, feature_columns, label_columns, _, partition_columns = \
869
+ self._validate_args_and_get_data(X=X, y=y, groups=None, kwargs=kwargs)
870
+
871
+ if partition_columns:
872
+ self._is_default_partition_value_fit = False
873
+ self._fit_partition_colums_non_default = partition_columns
874
+
875
+ self._run_fit_related_functions(data,
876
+ feature_columns,
877
+ label_columns,
878
+ partition_columns,
879
+ inspect.stack()[0][3],
880
+ classes)
881
+
882
+ self._partial_fit_execution_time = time.time() - st_time
883
+
884
+ return self
885
+
886
+ def fit(self, X=None, y=None, **kwargs):
887
+ """
888
+ Please check the description in Docs/OpensourceML/sklearn.py.
889
+ """
890
+ st_time = time.time()
891
+
892
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
893
+
894
+ data, feature_columns, label_columns, _, partition_columns = \
895
+ self._validate_args_and_get_data(X=X, y=y, groups=None, kwargs=kwargs)
896
+
897
+ if partition_columns:
898
+ self._is_default_partition_value_fit = False
899
+ self._fit_partition_colums_non_default = partition_columns
900
+
901
+ self._run_fit_related_functions(data,
902
+ feature_columns,
903
+ label_columns,
904
+ partition_columns,
905
+ inspect.stack()[0][3])
906
+
907
+ self._fit_execution_time = time.time() - st_time
908
+
909
+ return self
910
+
911
+ def set_params(self, **params):
912
+ """
913
+ Please check the description in Docs/OpensourceML/sklearn.py.
914
+ """
915
+ for key, val in params.items():
916
+ self.kwargs[key] = val
917
+
918
+ # Initialize with new arguments and return the class/model object.
919
+ # set_params takes all keyword arguments and no positional arguments.
920
+ self.__init__(None, self.module_name, self.class_name, tuple(), self.kwargs)
921
+ return self
922
+
923
+ # get_params() will be executed through __getattr__().
924
+
925
+ # @_validate_fit_run
926
+ def __getattr__(self, name):
927
+ def __run_transform(*c, **kwargs):
928
+ kwargs["name"] = name
929
+ return self._transform(*c, **kwargs)
930
+
931
+ def __run_function_needing_all_rows(*c, **kwargs):
932
+ kwargs["name"] = name
933
+ return self._run_function_needing_all_rows(*c, **kwargs)
934
+
935
+ def __run_kneighbors(*c, **kwargs):
936
+ kwargs["name"] = name
937
+ return self._run_neighbors(*c, **kwargs)
938
+
939
+ if name in ["score", "aic", "bic", "perplexity"]:
940
+ # TODO: ELE-6352 - Implement error_norm() function later.
941
+ return __run_function_needing_all_rows
942
+
943
+ if name in ["kneighbors",
944
+ "radius_neighbors",
945
+ "kneighbors_graph",
946
+ "radius_neighbors_graph"]:
947
+ return __run_kneighbors
948
+
949
+ if name in ["predict",
950
+ "transform",
951
+ "inverse_transform",
952
+ "predict_proba",
953
+ "predict_log_proba",
954
+ "decision_function",
955
+ "score_samples",
956
+ "decision_path",
957
+ "apply",
958
+ "cost_complexity_pruning_path",
959
+ "gibbs",
960
+ "kneighbors_graph",
961
+ "radius_neighbors_graph",
962
+ "mahalanobis",
963
+ "correct_covariance",
964
+ "reweight_covariance",
965
+ "path"]:
966
+ return __run_transform
967
+
968
+ return super().__getattr__(name)
969
+
970
+ def _get_return_columns_for_function_(self,
971
+ data,
972
+ feature_columns,
973
+ label_columns,
974
+ func_name,
975
+ n_partitions,
976
+ kwargs):
977
+ """
978
+ Internal function to return list of column names and their sqlalchemy types
979
+ which should be used in return_types of Script.
980
+ """
981
+ if func_name == "fit_predict":
982
+ """
983
+ Get return columns using label_columns.
984
+ """
985
+ return [(f"{self.class_name.lower()}_{func_name}_{(i + 1)}",
986
+ data._td_column_names_and_sqlalchemy_types[col.lower()])
987
+ for i, col in enumerate(label_columns)]
988
+ if func_name == "predict":
989
+ """
990
+ Return predict columns using either label_columns (if provided) or
991
+ self._fit_label_columns_types (if the function is trained using label columns).
992
+ Otherwise run predict on ten rows of data to get the number of columns and their types
993
+ after this if condition.
994
+ """
995
+ if label_columns:
996
+ return [(f"{self.class_name.lower()}_{func_name}_{(i + 1)}",
997
+ data._td_column_names_and_sqlalchemy_types[col.lower()])
998
+ for i, col in enumerate(label_columns)]
999
+ if self._fit_label_columns_types:
1000
+ return [(f"{self.class_name.lower()}_{func_name}_{(i + 1)}", col_type)
1001
+ for i, col_type in enumerate(self._fit_label_columns_types)]
1002
+
1003
+ data = data.select(feature_columns + label_columns)
1004
+
1005
+ ## If function is not `fit_predict`:
1006
+ # then take one row of transform/other functions to execute in client
1007
+ # to get number of columns in return clause and their Vantage types.
1008
+ n_f = len(feature_columns)
1009
+ n_c = len(label_columns)
1010
+
1011
+ # For paritioning columns, it will be a dataframe and getattr(modelObj, func_name) fails.
1012
+ # Just for getting the number of columns and their types, using only one model of all.
1013
+ if n_partitions == 1:
1014
+ # Single model case.
1015
+ skl_obj = self.modelObj
1016
+ else:
1017
+ # Multi model case.
1018
+ skl_obj = self.modelObj.iloc[0]["model"]
1019
+
1020
+ ten_row_data = data.head(10).get_values()
1021
+ X = numpy.array(ten_row_data)
1022
+ if label_columns:
1023
+ y = X[:,n_f : n_f + n_c]
1024
+ X = X[:,:n_f]
1025
+ # predict() now takes 'y' also for it to return the labels from script. Skipping 'y'
1026
+ # in local run if passed. Generally, 'y' is passed to return y along with actual output.
1027
+ try:
1028
+ trans_opt = getattr(skl_obj, func_name)(X, y, **kwargs)
1029
+ except TypeError as ex:
1030
+ # Function which does not accept 'y' like predict_proba() raises error like
1031
+ # "predict_proba() takes 2 positional arguments but 3 were given".
1032
+ trans_opt = getattr(skl_obj, func_name)(X, **kwargs)
1033
+ else:
1034
+ trans_opt = getattr(skl_obj, func_name)(X, **kwargs)
1035
+
1036
+ if func_name == "path":
1037
+ raise NotImplementedError(
1038
+ "path() returns tuple of ndarrays of different shapes. Not Implemented yet."
1039
+ )
1040
+
1041
+ # This import is as per scipy version 1.10.x in local machine as teradataml does not
1042
+ # impose restrictions on this package in setup.py. TODO
1043
+ from scipy.sparse import csr_matrix
1044
+
1045
+ if isinstance(trans_opt, csr_matrix):
1046
+ no_of_columns = trans_opt.get_shape()[1]
1047
+ trans_opt = trans_opt.toarray()
1048
+ elif isinstance(trans_opt, dict):
1049
+ raise NotImplementedError(f"Output returns dictionary {trans_opt}. NOT implemented yet.")
1050
+ elif isinstance(trans_opt[0], numpy.ndarray) \
1051
+ or isinstance(trans_opt[0], list) \
1052
+ or isinstance(trans_opt[0], tuple):
1053
+ no_of_columns = len(trans_opt[0])
1054
+ else:
1055
+ no_of_columns = 1
1056
+
1057
+ # Special handling required for cross_decomposition classes's transform function, which
1058
+ # takes label columns also. In this case, output is a tuple of numpy arrays - x_scores and
1059
+ # y_scores. If label columns are not provided, only x_scores are returned.
1060
+ if self.module_name == "sklearn.cross_decomposition" and func_name == "transform":
1061
+ # For cross_decomposition, output is a tuple of arrays when label columns are provided
1062
+ # along with feature columns for transform function. In this case, concatenate the
1063
+ # arrays and return the column names accordingly.
1064
+ if isinstance(trans_opt, tuple): # tuple when label_columns is provided.
1065
+ assert trans_opt[0].shape == trans_opt[1].shape,\
1066
+ "Output arrays should be of same shape when transform/fit_transform is run "\
1067
+ "with label columns for cross_decomposition classes.."
1068
+ first_cols = [f"x_scores_{(i + 1)}" for i in range(trans_opt[0].shape[1])]
1069
+ second_cols = [f"y_scores_{(i + 1)}" for i in range(trans_opt[1].shape[1])]
1070
+ no_of_columns = trans_opt[0].shape[1] + trans_opt[1].shape[1]
1071
+ col_names = first_cols + second_cols
1072
+
1073
+ trans_opt = numpy.concatenate(trans_opt, axis=1)
1074
+ else:
1075
+ assert isinstance(trans_opt, numpy.ndarray), "When transform/fit_transform is run "\
1076
+ "without label columns for cross_decomposition classes, "\
1077
+ "output should be a numpy array."
1078
+ no_of_columns = trans_opt.shape[1]
1079
+ col_names =[f"x_scores_{(i + 1)}" for i in range(trans_opt.shape[1])]
1080
+ else:
1081
+ # Generate list of new column names.
1082
+ col_names = [f"{self.class_name.lower()}_{func_name}_{(i + 1)}" for i in range(no_of_columns)]
1083
+
1084
+ # Get new column sqlalchemy types for pandas df columns of transform output.
1085
+ opt_pd = pd.DataFrame(trans_opt)
1086
+
1087
+ # If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
1088
+ # TIMESTAMP(timezone=True) else map it according to default value.
1089
+ col_types = [TIMESTAMP(timezone=True)
1090
+ if pt.is_datetime64_ns_dtype(opt_pd.dtypes[key]) and (opt_pd[col_name].dt.tz is not None)
1091
+ else _get_sqlalchemy_mapping(str(opt_pd.dtypes[key]))
1092
+ for key, col_name in enumerate(list(opt_pd.columns))]
1093
+
1094
+ return [(c_name, c_type) for c_name, c_type in zip(col_names, col_types)]
1095
+
1096
+ @_validate_fit_run
1097
+ def _run_function_needing_all_rows(self, X=None, y=None, **kwargs):
1098
+ """
1099
+ Internal function to run functions like score, aic, bic which needs all rows and return
1100
+ one floating number as result.
1101
+ """
1102
+ st_time = time.time()
1103
+
1104
+ assert kwargs["name"], "function name should be passed."
1105
+ func_name = kwargs["name"]
1106
+
1107
+ # Remove 'name' to pass other kwargs to script. TODO: Not passing it now.
1108
+ kwargs.pop("name")
1109
+
1110
+ data, feature_columns, label_columns, _, partition_columns = \
1111
+ self._validate_args_and_get_data(X=X, y=y, groups=None, kwargs=kwargs)
1112
+
1113
+ label_columns = self._get_columns_as_list(label_columns)
1114
+
1115
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
1116
+ feature_columns,
1117
+ label_columns,
1118
+ partition_columns)
1119
+
1120
+ file_name = "sklearn_score.py"
1121
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1122
+
1123
+ script_file_path = f"{file_name}" if self._is_lake_system \
1124
+ else f"./{self._db_name}/{file_name}"
1125
+
1126
+ partition_indices, partition_types, new_partition_columns = \
1127
+ self._get_partition_col_indices_and_types(data, new_partition_columns)
1128
+
1129
+ self._validate_unique_partition_values(data, new_partition_columns)
1130
+
1131
+ py_exc = UtilFuncs._get_python_execution_path()
1132
+ script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1133
+ f"{len(label_columns)} {partition_indices} {partition_types} "\
1134
+ f"{self._model_file_name_prefix} {self._is_lake_system}"
1135
+
1136
+ # score, aic, bic returns float values.
1137
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1138
+ for col in new_partition_columns] + [(func_name, FLOAT())]
1139
+
1140
+ self._install_initial_model_file()
1141
+
1142
+ opt = self._run_script(data, script_command, new_partition_columns, return_types)
1143
+
1144
+ self._score_execution_time = time.time() - st_time
1145
+
1146
+ if self._is_default_partition_value_fit:
1147
+ # For single model case, partition column is internally generated and
1148
+ # no point in returning it to the user.
1149
+ return opt.select(func_name)
1150
+
1151
+ return opt
1152
+
1153
+ @_validate_fit_run
1154
+ def _transform(self, X=None, y=None, **kwargs):
1155
+ """
1156
+ Internal function to run predict/transform and similar functions, which returns
1157
+ multiple columns. This function will return data row along with the generated
1158
+ columns' row data, unlike sklearn's functions which returns just output data.
1159
+ """
1160
+ st_time = time.time()
1161
+
1162
+ assert kwargs["name"], "function name should be passed."
1163
+ func_name = kwargs["name"]
1164
+
1165
+ # Remove 'name' to pass other kwargs to script. TODO: Not passing it now.
1166
+ kwargs.pop("name")
1167
+
1168
+ data, feature_columns, label_columns, _, partition_columns = \
1169
+ self._validate_args_and_get_data(X=X, y=y, groups=None, kwargs=kwargs)
1170
+
1171
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
1172
+ feature_columns,
1173
+ label_columns,
1174
+ partition_columns)
1175
+
1176
+ # Since kwargs are passed to transform, removing additional unrelated arguments from kwargs.
1177
+ if "data" in kwargs:
1178
+ kwargs.pop("data")
1179
+ if "feature_columns" in kwargs:
1180
+ kwargs.pop("feature_columns")
1181
+ if "group_columns" in kwargs:
1182
+ kwargs.pop("group_columns")
1183
+ if "partition_columns" in kwargs:
1184
+ kwargs.pop("partition_columns")
1185
+ if "label_columns" in kwargs:
1186
+ kwargs.pop("label_columns")
1187
+
1188
+ file_name = "sklearn_transform.py"
1189
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1190
+
1191
+ script_file_path = f"{file_name}" if self._is_lake_system \
1192
+ else f"./{self._db_name}/{file_name}"
1193
+
1194
+ partition_indices, partition_types, new_partition_columns = \
1195
+ self._get_partition_col_indices_and_types(data, new_partition_columns)
1196
+
1197
+ self._validate_unique_partition_values(data, new_partition_columns)
1198
+
1199
+ py_exc = UtilFuncs._get_python_execution_path()
1200
+ script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1201
+ f"{len(label_columns)} {partition_indices} {partition_types} "\
1202
+ f"{self._model_file_name_prefix} {self._is_lake_system}"
1203
+
1204
+ # Returning feature columns also along with transformed columns because we don't know the
1205
+ # mapping of feature columns to the transformed columns.
1206
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1207
+ for col in (new_partition_columns + feature_columns)]
1208
+ if func_name in ["predict", "decision_function"] and label_columns:
1209
+ return_types += [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1210
+ for col in label_columns]
1211
+ return_types += self._get_return_columns_for_function_(data,
1212
+ feature_columns,
1213
+ label_columns,
1214
+ func_name,
1215
+ len(new_partition_columns),
1216
+ kwargs)
1217
+
1218
+ # Installing model files before running sklearn_transform.py.
1219
+ self._install_initial_model_file()
1220
+
1221
+ opt = self._run_script(data, script_command, new_partition_columns, return_types)
1222
+
1223
+ self._transform_execution_time = time.time() - st_time
1224
+
1225
+ return self._get_returning_df(opt, new_partition_columns, return_types)
1226
+
1227
+ def fit_predict(self, X=None, y=None, **kwargs):
1228
+ """
1229
+ Please check the description in Docs/OpensourceML/sklearn.py.
1230
+ """
1231
+ st_time = time.time()
1232
+
1233
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
1234
+
1235
+ data, feature_columns, label_columns, _, partition_columns = \
1236
+ self._validate_args_and_get_data(X=X, y=y, groups=None, kwargs=kwargs)
1237
+
1238
+ if partition_columns:
1239
+ self._is_default_partition_value_fit = False
1240
+
1241
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
1242
+ feature_columns,
1243
+ label_columns,
1244
+ partition_columns)
1245
+
1246
+ # Return label_columns also if user provides in the function call.
1247
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1248
+ for col in (new_partition_columns + feature_columns + label_columns)]
1249
+
1250
+ func_name = inspect.stack()[0][3]
1251
+ if label_columns:
1252
+ return_types += self._get_return_columns_for_function_(data,
1253
+ feature_columns,
1254
+ label_columns,
1255
+ func_name,
1256
+ len(new_partition_columns),
1257
+ {})
1258
+ else:
1259
+ # If there are no label_columns, we will have only one
1260
+ # predicted column.
1261
+ return_types += [(f"{self.class_name.lower()}_{func_name}_1", FLOAT())]
1262
+
1263
+ file_name = "sklearn_fit_predict.py"
1264
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1265
+
1266
+ partition_indices, partition_types, new_partition_columns = \
1267
+ self._get_partition_col_indices_and_types(data, new_partition_columns)
1268
+
1269
+ script_file_name = f"{file_name}" if self._is_lake_system \
1270
+ else f"./{self._db_name}/{file_name}"
1271
+ py_exc = UtilFuncs._get_python_execution_path()
1272
+ script_command = f"{py_exc} {script_file_name} {len(feature_columns)} "\
1273
+ f"{len(label_columns)} {partition_indices} {partition_types} "\
1274
+ f"{self._model_file_name_prefix} {self._is_lake_system}"
1275
+
1276
+ # Get unique values in partitioning columns.
1277
+ self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
1278
+
1279
+ self._install_initial_model_file()
1280
+
1281
+ opt = self._run_script(data, script_command, new_partition_columns, return_types)
1282
+
1283
+ self._fit_predict_execution_time = time.time() - st_time
1284
+
1285
+ if self._is_default_partition_value_fit:
1286
+ # For single model case, partition column is internally generated and no point in
1287
+ # returning it to the user.
1288
+
1289
+ # Extract columns from return types.
1290
+ returning_cols = [col[0] for col in return_types[len(new_partition_columns):]]
1291
+ return opt.select(returning_cols)
1292
+
1293
+ return opt
1294
+
1295
+ def fit_transform(self, X=None, y=None, **kwargs):
1296
+ """
1297
+ Please check the description in Docs/OpensourceML/sklearn.py.
1298
+ """
1299
+ # 'y' is not needed for transform().
1300
+ fit_obj = self.fit(X, y, **kwargs)
1301
+ kwargs["label_columns"] = None
1302
+ return fit_obj.transform(X, None, **kwargs)
1303
+
1304
+ @_validate_fit_run
1305
+ def _run_neighbors(self, X=None, **kwargs):
1306
+ """
1307
+ Internal function to run functions like kneighbors, radius_neighbors, kneighbors_graph,
1308
+ radius_neighbors_graph which returns multiple columns. This function will return data row
1309
+ along with the generated columns' row data, unlike sklearn's functions which returns just
1310
+ output data.
1311
+ """
1312
+ assert kwargs["name"], "function name should be passed."
1313
+ func_name = kwargs["name"]
1314
+ kwargs.pop("name")
1315
+
1316
+ if self.module_name != "sklearn.neighbors":
1317
+ raise AttributeError(f"{self.module_name+'.'+self.class_name} does not have {func_name}() method.")
1318
+
1319
+ data = kwargs.get("data", None)
1320
+ partition_columns = kwargs.get("partition_columns", None)
1321
+
1322
+ if not X and not partition_columns and not data:
1323
+ # If data is not passed, then run from client only.
1324
+ # TODO: decide whether to run from client or from Vantage.
1325
+ opt = super().__getattr__(func_name)(**kwargs)
1326
+ from scipy.sparse.csr import csr_matrix
1327
+ if isinstance(opt, csr_matrix):
1328
+ return opt.toarray()
1329
+ return opt
1330
+
1331
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
1332
+
1333
+ data, feature_columns, _, _, new_partition_columns = \
1334
+ self._validate_args_and_get_data(X=X, y=None, groups=None, kwargs=kwargs,
1335
+ skip_either_or_that=True)
1336
+
1337
+ # Remove the kwargs data.
1338
+ input_data = kwargs.pop("data", None)
1339
+ partition_cols = kwargs.pop("partition_columns", None)
1340
+ feature_cols = kwargs.pop("feature_columns", None)
1341
+ label_cols = kwargs.pop("label_columns", None)
1342
+
1343
+ if partition_columns:
1344
+ # kwargs are passed to kneighbors function. So, removing them from kwargs.
1345
+ kwargs.pop("partition_columns")
1346
+ self._is_default_partition_value_fit = False
1347
+
1348
+ # Generating new partition column name.
1349
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
1350
+ feature_columns,
1351
+ [],
1352
+ partition_columns)
1353
+
1354
+ args_str = self._get_kwargs_str(kwargs)
1355
+
1356
+ file_name = "sklearn_neighbors.py"
1357
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1358
+
1359
+ script_file_path = f"{file_name}" if self._is_lake_system \
1360
+ else f"./{self._db_name}/{file_name}"
1361
+
1362
+ # Returning feature columns also along with new columns.
1363
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1364
+ for col in (new_partition_columns + feature_columns)]
1365
+
1366
+ # `return_distance` is needed as the result is a tuple of two arrays when it is True.
1367
+ return_distance = kwargs.get("return_distance", True) # Default value is True.
1368
+
1369
+ # Though new columns return numpy arrays, we are returning them as strings.
1370
+ # TODO: Will update to columns later, if requested later.
1371
+ if func_name in ['kneighbors', 'radius_neighbors']:
1372
+ if return_distance:
1373
+ return_types += [("neigh_dist", VARCHAR())]
1374
+ return_types += [("neigh_ind", VARCHAR())]
1375
+ elif func_name in ['kneighbors_graph', 'radius_neighbors_graph']:
1376
+ return_types += [("A", VARCHAR())]
1377
+ else:
1378
+ return_types += [("output", VARCHAR())]
1379
+
1380
+ partition_indices, partition_types, new_partition_columns = \
1381
+ self._get_partition_col_indices_and_types(data, new_partition_columns)
1382
+
1383
+ py_exc = UtilFuncs._get_python_execution_path()
1384
+ script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1385
+ f"{partition_indices} {partition_types} {self._model_file_name_prefix} {self._is_lake_system} "\
1386
+ f"{args_str}"
1387
+
1388
+ # Get unique values in partitioning columns.
1389
+ self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
1390
+
1391
+ self._install_initial_model_file()
1392
+
1393
+ opt = self._run_script(data, script_command, new_partition_columns, return_types)
1394
+
1395
+ return self._get_returning_df(opt, new_partition_columns, return_types)
1396
+
1397
+ def split(self, X=None, y=None, groups=None, **kwargs):
1398
+ """
1399
+ Please check the description in Docs/OpensourceML/sklearn.py.
1400
+ """
1401
+ opt = self._run_model_selection("split", X=X, y=y, groups=groups,
1402
+ skip_either_or_that=True, kwargs=kwargs)
1403
+
1404
+ # Get number of splits in the result DataFrame.
1405
+ n_splits = opt.drop_duplicate("split_id").shape[0]
1406
+
1407
+ data = kwargs.get("data", None)
1408
+ feature_columns = kwargs.get("feature_columns", [])
1409
+ label_columns = self._get_columns_as_list(kwargs.get("label_columns", []))
1410
+
1411
+ # If there is not X and y, get feature_columns and label_columns for "data".
1412
+ partition_columns = kwargs.get("partition_columns", [])
1413
+ feature_columns = [col for col in X.columns if col not in partition_columns] \
1414
+ if X and not data and not feature_columns else feature_columns
1415
+ label_columns = y.columns if y and not data and not label_columns else label_columns
1416
+
1417
+ # Return iterator of the train and test dataframes for each split.
1418
+ for i in range(1, n_splits+1):
1419
+ train_df = opt[(opt.split_id == i) & (opt.data_type == "train")]\
1420
+ .select(partition_columns + feature_columns + label_columns)
1421
+ train_df._index_label = None
1422
+ test_df = opt[(opt.split_id == i) & (opt.data_type == "test")]\
1423
+ .select(partition_columns + feature_columns + label_columns)
1424
+ test_df._index_label = None
1425
+
1426
+ yield train_df, test_df
1427
+
1428
+ def get_n_splits(self, X=None, y=None, groups=None, **kwargs):
1429
+ """
1430
+ Please check the description in Docs/OpensourceML/sklearn.py.
1431
+ """
1432
+ return self._run_model_selection("get_n_splits", X=X, y=y, groups=groups,
1433
+ skip_either_or_that=True, kwargs=kwargs)
1434
+
1435
+ def _run_model_selection(self,
1436
+ func_name,
1437
+ X=None,
1438
+ y=None,
1439
+ groups=None,
1440
+ skip_either_or_that=False,
1441
+ kwargs={}):
1442
+ """
1443
+ Internal function to run functions like split, get_n_splits of model selection module.
1444
+ - get_n_splits() returns number of splits as value, not as teradataml DataFrame.
1445
+ - split() returns teradataml DataFrame containing train and test data for each split
1446
+ (add partition information if the argument "partition_cols" is provided).
1447
+ """
1448
+ if self.module_name != "sklearn.model_selection":
1449
+ raise AttributeError(f"{self.module_name+'.'+self.class_name} does not "
1450
+ f"have {func_name}() method.")
1451
+
1452
+ data = kwargs.get("data", None)
1453
+
1454
+ if not X and not y and not groups and not data:
1455
+ # If data is not passed, then run from client only.
1456
+ # TODO: decide whether to run from client or from Vantage.
1457
+ return super().__getattr__(func_name)()
1458
+
1459
+ self._is_default_partition_value_fit = True # False when the user provides partition columns.
1460
+
1461
+ data, feature_columns, label_columns, group_columns, partition_columns = \
1462
+ self._validate_args_and_get_data(X=X, y=y, groups=groups, kwargs=kwargs,
1463
+ skip_either_or_that=skip_either_or_that)
1464
+
1465
+ if partition_columns:
1466
+ self._is_default_partition_value_fit = False
1467
+
1468
+ data, new_partition_columns = self._get_data_and_data_partition_columns(data,
1469
+ feature_columns,
1470
+ label_columns,
1471
+ partition_columns,
1472
+ group_columns)
1473
+
1474
+ file_name = "sklearn_model_selection_split.py"
1475
+ self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1476
+
1477
+ script_file_path = f"{file_name}" if self._is_lake_system \
1478
+ else f"./{self._db_name}/{file_name}"
1479
+
1480
+ if func_name == "split":
1481
+ # Need to generate data into splits of train and test.
1482
+ # split_id - the column which will be used to identify the split.
1483
+ # data_type - the column which will be used to identify whether the row is
1484
+ # train or test row.
1485
+ return_types = [("split_id", INTEGER()), ("data_type", VARCHAR())]
1486
+ # Returning feature columns and label columns as well.
1487
+ return_types += [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1488
+ for col in (feature_columns + label_columns)]
1489
+ else:
1490
+ # Return Varchar by default.
1491
+ # Returns Varchar even for functions like `get_n_splits` which returns large integer
1492
+ # numbers like `4998813702034726525205100` for `LeavePOut` class (when the argument
1493
+ # `p` is 28 and no of data rows is 100) as Vantage cannot scope it to INTEGER.
1494
+ return_types = [(func_name, VARCHAR())]
1495
+
1496
+ return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1497
+ for col in new_partition_columns] + return_types
1498
+
1499
+ partition_indices, partition_types, new_partition_columns = \
1500
+ self._get_partition_col_indices_and_types(data, new_partition_columns)
1501
+
1502
+ py_exc = UtilFuncs._get_python_execution_path()
1503
+ script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1504
+ f"{len(label_columns)} {len(group_columns)} {partition_indices} {partition_types} "\
1505
+ f"{self._model_file_name_prefix} {self._is_lake_system}"
1506
+
1507
+ # Get unique values in partitioning columns.
1508
+ self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
1509
+
1510
+ self._install_initial_model_file()
1511
+
1512
+ opt = self._run_script(data, script_command, new_partition_columns, return_types)
1513
+
1514
+ if func_name == "get_n_splits" and not partition_columns:
1515
+ # Return number of splits as value, not as dataframe.
1516
+ vals = execute_sql("select {} from {}".format(func_name, opt._table_name))
1517
+ opt = vals.fetchall()[0][0]
1518
+
1519
+ # Varchar is returned by the script. Convert it to int.
1520
+ return int(opt)
1521
+
1522
+ return opt
1523
+
1524
+ def _get_returning_df(self, script_df, partition_column, returns):
1525
+ """
1526
+ Internal function to return the teradataml Dataframe except
1527
+ partition_column.
1528
+ """
1529
+ if self._is_default_partition_value_fit:
1530
+ # For single model case, partition column is internally generated
1531
+ # and no point in returning it to the user.
1532
+
1533
+ # Extract columns from return types.
1534
+ returning_cols = [col[0] for col in returns[len(partition_column):]]
1535
+ return script_df.select(returning_cols)
1536
+ return script_df
1537
+
1538
+
1539
+ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
1540
+ def __init__(self, module_name, func_name):
1541
+ super().__init__()
1542
+ self.__module_name = module_name
1543
+ self.__func_name = func_name
1544
+ self.__params = None
1545
+ self.__data_args = OrderedDict()
1546
+ self._model_file_name = _generate_new_name(type="file_function", extension="py")
1547
+
1548
+ def __call__(self, **kwargs):
1549
+ """
1550
+ Run the function with all the arguments passed from `td_sklearn.<function_name>` function.
1551
+ """
1552
+ __data_columns = []
1553
+
1554
+ partition_cols = self._get_columns_as_list(kwargs.get("partition_columns", None))
1555
+ if partition_cols:
1556
+ kwargs.pop("partition_columns")
1557
+
1558
+ # Separate dataframe related arguments and their column names from actual kwargs.
1559
+ for k, v in kwargs.items():
1560
+ if isinstance(v, DataFrame):
1561
+ # All dataframes should be select of parent dataframe.
1562
+ _validate_df_query_type(v, "select", k)
1563
+
1564
+ # Save all columns in dataframe related arguments.
1565
+ __data_columns.extend(v.columns)
1566
+
1567
+ self.__data_args[k] = v
1568
+
1569
+
1570
+ # Get common parent dataframe from all dataframes.
1571
+ self.__tdml_df = DataFrameUtils()._get_common_parent_df_from_dataframes(list(self.__data_args.values()))
1572
+
1573
+ self._validate_existence_of_partition_columns(partition_cols, self.__tdml_df.columns)
1574
+
1575
+ self.__tdml_df = self.__tdml_df.select(__data_columns + partition_cols)
1576
+
1577
+ self.__tdml_df, partition_cols = self._get_data_and_data_partition_columns(self.__tdml_df,
1578
+ __data_columns,
1579
+ [],
1580
+ partition_cols
1581
+ )
1582
+
1583
+ # Prepare string of data arguments with name, indices where columns of that argument resides
1584
+ # and types of each of the column.
1585
+ data_args_str = self._prepare_data_args_string(kwargs)
1586
+
1587
+ self.__params = kwargs
1588
+
1589
+ # Get indices and types of partition_columns.
1590
+ idxs, types, partition_cols = self._get_partition_col_indices_and_types(self.__tdml_df,
1591
+ partition_cols)
1592
+
1593
+ script_file_path = f"{self._model_file_name}" if self._is_lake_system \
1594
+ else f"./{self._db_name}/{self._model_file_name}"
1595
+ py_exc = UtilFuncs._get_python_execution_path()
1596
+ script_command = (f"{py_exc} {script_file_path} {idxs}"
1597
+ f" ") + \
1598
+ f"{types} {data_args_str}"
1599
+
1600
+ return_types = [(col, self.__tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
1601
+ for col in partition_cols] + [(self.__func_name, CLOB())]
1602
+
1603
+ # Generate new file in .teradataml directory and install it to Vantage.
1604
+ self._prepare_and_install_file()
1605
+
1606
+ self._model_data = self._run_script(self.__tdml_df, script_command, partition_cols, return_types)
1607
+ self._model_data._index_label = None
1608
+
1609
+ fit_partition_unique_values = self.__tdml_df.drop_duplicate(partition_cols).get_values()
1610
+
1611
+ self.extract_sklearn_obj(n_unique_partitions=len(fit_partition_unique_values),
1612
+ n_partition_cols=len(partition_cols))
1613
+
1614
+ # File cleanup after processing.
1615
+ os.remove(self._model_file_local)
1616
+ remove_file(file_identifier=self._model_file_name.split(".")[0], suppress_output=True,
1617
+ force_remove=True)
1618
+
1619
+ return self.modelObj
1620
+
1621
+ def _prepare_data_args_string(self, kwargs):
1622
+ data_args_str = []
1623
+ for arg_name in list(self.__data_args.keys()):
1624
+ # Remove DataFrame arguments from kwargs, which will be passed to Script.
1625
+ kwargs.pop(arg_name)
1626
+
1627
+ # Get column indices and their types for each dataframe from parent dataframe.
1628
+ _indices, _types, _ = self._get_partition_col_indices_and_types(self.__tdml_df,
1629
+ self.__data_args[arg_name].columns)
1630
+
1631
+ # Format "<arg_name>-<comma separated indices>-<comma separated types>"
1632
+ data_args_str.append(f"{arg_name}-{_indices}-{_types}")
1633
+
1634
+ # Format "{<arg_name>-<comma separated indices>-<comma separated types>}--
1635
+ # {<arg_name>-<comma separated indices>-<comma separated types>}"
1636
+ return "--".join(data_args_str)
1637
+
1638
+ def _validate_existence_of_partition_columns(self, partition_columns, all_columns):
1639
+ """
1640
+ Validate if columns in "partition_columns" argument are present in any of the given
1641
+ dataframes.
1642
+ """
1643
+ invalid_part_cols = [c for c in partition_columns if c not in all_columns]
1644
+
1645
+ if invalid_part_cols:
1646
+ raise ValueError(Messages.get_message(MessageCodes.INVALID_PARTITIONING_COLS,
1647
+ ", ".join(invalid_part_cols),
1648
+ "', '".join(list(self.__data_args.keys())))
1649
+ )
1650
+
1651
+ def _prepare_and_install_file(self):
1652
+ """
1653
+ Prepare function script file from template file and install it in Vaantage.
1654
+ """
1655
+ with open(os.path.join(self._scripts_path, "sklearn_function.template")) as fp:
1656
+ script_data = fp.read()
1657
+ script_data = script_data.replace("<module_name>",self.__module_name).\
1658
+ replace("<func_name>",self.__func_name).replace("<params>", json.dumps(self.__params))
1659
+
1660
+ self._model_file_local = os.path.join(self._tdml_tmp_dir, self._model_file_name)
1661
+
1662
+ with open(self._model_file_local, "w") as fp:
1663
+ fp.write(script_data)
1664
+
1665
+ self._install_script_file(file_identifier=self._model_file_name.split(".")[0],
1666
+ file_name=self._model_file_name,
1667
+ file_location=self._tdml_tmp_dir)
1668
+