teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (240) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +306 -0
  4. teradataml/__init__.py +10 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +299 -16
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +13 -3
  11. teradataml/analytics/json_parser/utils.py +13 -6
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +11 -2
  15. teradataml/analytics/table_operator/__init__.py +4 -3
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +66 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1502 -323
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +247 -307
  22. teradataml/automl/data_transformation.py +32 -12
  23. teradataml/automl/feature_engineering.py +325 -86
  24. teradataml/automl/model_evaluation.py +44 -35
  25. teradataml/automl/model_training.py +122 -153
  26. teradataml/catalog/byom.py +8 -8
  27. teradataml/clients/pkce_client.py +1 -1
  28. teradataml/common/__init__.py +2 -1
  29. teradataml/common/constants.py +72 -0
  30. teradataml/common/deprecations.py +13 -7
  31. teradataml/common/garbagecollector.py +152 -120
  32. teradataml/common/messagecodes.py +11 -2
  33. teradataml/common/messages.py +4 -1
  34. teradataml/common/sqlbundle.py +26 -4
  35. teradataml/common/utils.py +225 -14
  36. teradataml/common/wrapper_utils.py +1 -1
  37. teradataml/context/context.py +82 -2
  38. teradataml/data/SQL_Fundamentals.pdf +0 -0
  39. teradataml/data/complaints_test_tokenized.csv +353 -0
  40. teradataml/data/complaints_tokens_model.csv +348 -0
  41. teradataml/data/covid_confirm_sd.csv +83 -0
  42. teradataml/data/dataframe_example.json +27 -1
  43. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  44. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  45. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  46. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  47. teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
  48. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  49. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  50. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  51. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  52. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  53. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  54. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  55. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  56. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  57. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  58. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  59. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  60. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  61. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  62. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  63. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  64. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  65. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  66. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  67. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  68. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  69. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  70. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  71. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  72. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  74. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  75. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  76. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  77. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  78. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  79. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  80. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  81. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  82. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  83. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  84. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  85. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  86. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  87. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  88. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  89. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  90. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  91. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  92. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  93. teradataml/data/dwt2d_dataTable.csv +65 -0
  94. teradataml/data/dwt_dataTable.csv +8 -0
  95. teradataml/data/dwt_filterTable.csv +3 -0
  96. teradataml/data/finance_data4.csv +13 -0
  97. teradataml/data/grocery_transaction.csv +19 -0
  98. teradataml/data/idwt2d_dataTable.csv +5 -0
  99. teradataml/data/idwt_dataTable.csv +8 -0
  100. teradataml/data/idwt_filterTable.csv +3 -0
  101. teradataml/data/interval_data.csv +5 -0
  102. teradataml/data/jsons/paired_functions.json +14 -0
  103. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  104. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  105. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  106. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  107. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  108. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  109. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  110. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  111. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  112. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  113. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  114. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  115. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  116. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  117. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  118. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  119. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  120. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  121. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  122. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  123. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  124. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  125. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  126. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  127. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  128. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  129. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  130. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  131. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  132. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  133. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  134. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  135. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  136. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  137. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  138. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  139. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  140. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  141. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  142. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  143. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  144. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  145. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  146. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  147. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  148. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  149. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  150. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  151. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  152. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  153. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  154. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  155. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  156. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  157. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  158. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  159. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  160. teradataml/data/load_example_data.py +8 -2
  161. teradataml/data/medical_readings.csv +101 -0
  162. teradataml/data/naivebayestextclassifier_example.json +1 -1
  163. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  164. teradataml/data/patient_profile.csv +101 -0
  165. teradataml/data/peppers.png +0 -0
  166. teradataml/data/real_values.csv +14 -0
  167. teradataml/data/sax_example.json +8 -0
  168. teradataml/data/scripts/deploy_script.py +1 -1
  169. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  170. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  171. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  172. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  173. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
  174. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  175. teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
  176. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  177. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  178. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  179. teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
  180. teradataml/data/star_pivot.csv +8 -0
  181. teradataml/data/target_udt_data.csv +8 -0
  182. teradataml/data/templates/open_source_ml.json +3 -1
  183. teradataml/data/teradataml_example.json +20 -1
  184. teradataml/data/timestamp_data.csv +4 -0
  185. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  186. teradataml/data/uaf_example.json +55 -1
  187. teradataml/data/unpivot_example.json +15 -0
  188. teradataml/data/url_data.csv +9 -0
  189. teradataml/data/vectordistance_example.json +4 -0
  190. teradataml/data/windowdfft.csv +16 -0
  191. teradataml/dataframe/copy_to.py +1 -1
  192. teradataml/dataframe/data_transfer.py +5 -3
  193. teradataml/dataframe/dataframe.py +1002 -201
  194. teradataml/dataframe/fastload.py +3 -3
  195. teradataml/dataframe/functions.py +867 -0
  196. teradataml/dataframe/row.py +160 -0
  197. teradataml/dataframe/setop.py +2 -2
  198. teradataml/dataframe/sql.py +840 -33
  199. teradataml/dataframe/window.py +1 -1
  200. teradataml/dbutils/dbutils.py +878 -34
  201. teradataml/dbutils/filemgr.py +48 -1
  202. teradataml/geospatial/geodataframe.py +1 -1
  203. teradataml/geospatial/geodataframecolumn.py +1 -1
  204. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  205. teradataml/lib/aed_0_1.dll +0 -0
  206. teradataml/opensource/__init__.py +1 -1
  207. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  208. teradataml/opensource/_lightgbm.py +950 -0
  209. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  210. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  211. teradataml/opensource/sklearn/__init__.py +0 -1
  212. teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
  213. teradataml/options/__init__.py +9 -23
  214. teradataml/options/configure.py +42 -4
  215. teradataml/options/display.py +2 -2
  216. teradataml/plot/axis.py +4 -4
  217. teradataml/scriptmgmt/UserEnv.py +13 -9
  218. teradataml/scriptmgmt/lls_utils.py +77 -23
  219. teradataml/store/__init__.py +13 -0
  220. teradataml/store/feature_store/__init__.py +0 -0
  221. teradataml/store/feature_store/constants.py +291 -0
  222. teradataml/store/feature_store/feature_store.py +2223 -0
  223. teradataml/store/feature_store/models.py +1505 -0
  224. teradataml/store/vector_store/__init__.py +1586 -0
  225. teradataml/table_operators/Script.py +2 -2
  226. teradataml/table_operators/TableOperator.py +106 -20
  227. teradataml/table_operators/query_generator.py +3 -0
  228. teradataml/table_operators/table_operator_query_generator.py +3 -1
  229. teradataml/table_operators/table_operator_util.py +102 -56
  230. teradataml/table_operators/templates/dataframe_register.template +69 -0
  231. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  232. teradataml/telemetry_utils/__init__.py +0 -0
  233. teradataml/telemetry_utils/queryband.py +52 -0
  234. teradataml/utils/dtypes.py +4 -2
  235. teradataml/utils/validators.py +34 -2
  236. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
  237. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
  238. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  239. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  240. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -31,39 +31,83 @@ def splitter(strr, delim=",", convert_to="str"):
31
31
  return []
32
32
  return [convert_to_type(i, convert_to) for i in strr.split(delim)]
33
33
 
34
+ def should_convert(t_val, py_type):
35
+ """
36
+ Function to check type of value and whether value is nan and infinity.
37
+ """
38
+ return not isinstance(t_val, eval(py_type)) and not math.isinf(t_val) and not math.isnan(t_val)
39
+
40
+ def convert_value(t_val, py_type):
41
+ """
42
+ Function to convert value to specified python type.
43
+ """
44
+ return convert_to_type(t_val, py_type) if should_convert(t_val, py_type) else t_val
45
+
34
46
  # Process output returned by sklearn function.
35
- def get_output_data(trans_values, func_name, model_obj, n_c_labels):
36
- # Converting sparse matrix to dense array as sparse matrices are NOT
47
+ def get_output_data(trans_values, func_name, model_obj, n_c_labels, n_out_columns):
48
+ # Converting sparse matrix to dense array as sparse matrices are NOT
37
49
  # supported in Vantage.
38
50
  module_name = model_obj.__module__.split("._")[0]
39
51
 
40
- if type(trans_values).__name__ in ["csr_matrix", "csc_matrix"]:
41
- trans_values = trans_values.toarray()
52
+ # Converting the translated values into corresponding the return column's
53
+ # python type.
54
+ if (func_name == "decision_path" or return_columns_python_types is None \
55
+ or not isinstance(trans_values, np.ndarray)):
56
+ trans_values_list = trans_values
57
+ else:
58
+ # Conversion.....
59
+ trans_values_list = []
60
+ for trans_value in trans_values.tolist():
61
+ if not isinstance(trans_value, list):
62
+ trans_value = [trans_value]
63
+
64
+ converted_list = []
65
+ if len(return_columns_python_types) == len(trans_value):
66
+ for t_val, py_type in zip(trans_value, return_columns_python_types):
67
+ converted_list.append(convert_value(t_val, py_type))
68
+ ## transform() is having only 1 python return type, But it actually returns more than 1 column
69
+ else:
70
+ for t_val in trans_value:
71
+ converted_list.append(convert_value(t_val, "".join(return_columns_python_types)))
72
+
73
+ trans_values_list.append(converted_list)
74
+
75
+ if type(trans_values_list).__name__ in ["csr_matrix", "csc_matrix"]:
76
+ trans_values_list = trans_values_list.toarray()
42
77
 
43
78
  if module_name == "sklearn.cross_decomposition" and n_c_labels > 0 and func_name == "transform":
44
79
  # For cross_decomposition, output is a tuple of arrays when label columns are provided
45
80
  # along with feature columns for transform function. In this case, concatenate the
46
81
  # arrays and return the combined values.
47
- if isinstance(trans_values, tuple):
48
- return np.concatenate(trans_values, axis=1).tolist()[0]
82
+ if isinstance(trans_values_list, tuple):
83
+ return np.concatenate(trans_values_list, axis=1).tolist()[0]
49
84
 
50
- if isinstance(trans_values[0], np.ndarray) \
51
- or isinstance(trans_values[0], list) \
52
- or isinstance(trans_values[0], tuple):
85
+ if isinstance(trans_values_list[0], np.ndarray) \
86
+ or isinstance(trans_values_list[0], list) \
87
+ or isinstance(trans_values_list[0], tuple):
53
88
  # Here, the value returned by sklearn function is list type.
54
- opt_list = list(trans_values[0])
89
+ opt_list = list(trans_values_list[0])
90
+
91
+ if len(opt_list) < n_out_columns:
92
+ # If the output list is less than the required number of columns, append
93
+ # empty strings to the list.
94
+ opt_list += [""] * (n_out_columns - len(opt_list))
95
+
55
96
  if func_name == "inverse_transform" and type(model_obj).__name__ == "MultiLabelBinarizer":
56
97
  # output array "trans_values[0]" may not be of same size. It should be of
57
98
  # maximum size of `model.classes_`
58
99
  # Append None to last elements.
59
100
  if len(opt_list) < len(model_obj.classes_):
60
101
  opt_list += [""] * (len(model_obj.classes_) - len(opt_list))
102
+
61
103
  return opt_list
62
- return [trans_values[0]]
104
+
105
+ # Only one element is returned by the function.
106
+ return [trans_values_list[0]]
63
107
 
64
108
  # Arguments to the Script
65
- if len(sys.argv) != 8:
66
- # 8 arguments command line arguments should be passed to this file.
109
+ if len(sys.argv) != 10:
110
+ # 10 arguments command line arguments should be passed to this file.
67
111
  # 1: file to be run
68
112
  # 2. function name (Eg. predict, fit etc)
69
113
  # 3. No of feature columns.
@@ -71,13 +115,17 @@ if len(sys.argv) != 8:
71
115
  # 5. Comma separated indices of partition columns.
72
116
  # 6. Comma separated types of all the data columns.
73
117
  # 7. Model file prefix to generated model file using partition columns.
74
- # 8. Flag to check the system type. True, means Lake, Enterprise otherwise.
75
- sys.exit("8 arguments should be passed to this file - file to be run, function name, "\
76
- "no of feature columns, no of class labels, comma separated indices of partition "\
77
- "columns, comma separated types of all columns, model file prefix to generate model "\
78
- "file using partition columns and flag to check lake or enterprise.")
118
+ # 8. Number of columns to be returned by the sklearn's transform function.
119
+ # 9. Flag to check the system type. True, means Lake, Enterprise otherwise.
120
+ # 10. Python types of returned/transfromed columns.
121
+ sys.exit("10 arguments should be passed to this file - file to be run, function name, "\
122
+ "no of feature columns, no of class labels, comma separated indices of partition "\
123
+ "columns, comma separated types of all columns, model file prefix to generate model "\
124
+ "file using partition columns, number of columns to be returnd by sklearn's "\
125
+ "transform function, flag to check lake or enterprise and Python types of "\
126
+ "returned/transfromed columns.")
79
127
 
80
- is_lake_system = eval(sys.argv[7])
128
+ is_lake_system = eval(sys.argv[8])
81
129
  if not is_lake_system:
82
130
  db = sys.argv[0].split("/")[1]
83
131
  func_name = sys.argv[1]
@@ -86,17 +134,29 @@ n_c_labels = int(sys.argv[3])
86
134
  data_column_types = splitter(sys.argv[5], delim="--")
87
135
  data_partition_column_indices = splitter(sys.argv[4], convert_to="int") # indices are integers.
88
136
  model_file_prefix = sys.argv[6]
137
+ # sys.argv[9] will contain a string of python datatypes with '--'
138
+ # separator OR a single datatype OR None in string format.
139
+ ret_col_argv = sys.argv[9]
140
+ if ret_col_argv == "None":
141
+ return_columns_python_types = eval(ret_col_argv)
142
+ else:
143
+ return_columns_python_types = splitter(ret_col_argv, delim="--")
144
+
145
+ no_of_output_columns = int(sys.argv[7])
89
146
 
90
147
  data_partition_column_types = [data_column_types[idx] for idx in data_partition_column_indices]
91
148
 
92
149
  model = None
93
150
  data_partition_column_values = []
94
151
 
152
+ all_rows_input = []
153
+
95
154
  # Data Format:
96
155
  # feature1, feature2, ..., featuren, label1, label2, ... labelk, data_partition_column1, ...,
97
156
  # data_partition_columnn.
98
157
  # label is optional (it is present when label_exists is not "None")
99
158
 
159
+ model_name = ""
100
160
  while 1:
101
161
  try:
102
162
  line = input()
@@ -128,9 +188,48 @@ while 1:
128
188
  sys.exit("Model file is not installed in Vantage.")
129
189
 
130
190
  f_ = values[:n_f_cols]
191
+
192
+ model_name = model.__class__.__name__
193
+ np_func_list = ["ClassifierChain", "EllipticEnvelope", "MinCovDet",
194
+ "FeatureAgglomeration", "LabelBinarizer", "MultiLabelBinarizer",
195
+ "BernoulliRBM"]
196
+
197
+ # MissingIndicator's transform() and SimpleImputer's inverse_transform() requires processing
198
+ # the entire dataset simultaneously, rather than on a row-by-row basis.
199
+
200
+ # Error getting during row-by-row processing of MissingIndicator -
201
+ # "ValueError: MissingIndicator does not support data with dtype <U13.
202
+ # Please provide either a numeric array (with a floating point or
203
+ # integer dtype) or categorical data represented ei
204
+
205
+ # Error getting during row-by-row processing of SimpleImputer -
206
+ # "IndexError: index 3 is out of bounds for axis 1 with size 3".
207
+ if ((model_name == "MissingIndicator" and func_name == "transform") or \
208
+ (model_name == "SimpleImputer" and func_name == "inverse_transform") or \
209
+ (model_name in ["EllipticEnvelope", "MinCovDet"]
210
+ and func_name == "correct_covariance")):
211
+ all_rows_input.append(f_)
212
+ continue
213
+
214
+ f__ = np.array([f_]) if model_name in np_func_list else [f_]
215
+
216
+ # transform() function in these functions generate different number of output columns and
217
+ # NULLS/NaNs are appended to the end of the output.
218
+ # If we run inverse_transform() on these models, it will take same number of input columns
219
+ # with NULLs/NaNs but those NULLs/NaNs should be ignored while reading the input to
220
+ # inverse_transform() function.
221
+ models_with_all_null_in_last_cols = ["SelectFpr", "SelectFdr", "SelectFwe", "SelectFromModel", "RFECV"]
222
+ if model_name in models_with_all_null_in_last_cols and func_name == "inverse_transform":
223
+ # Remove NULLs/NaNs from the end of one input row.
224
+ _f = np.array([f_])
225
+ _f = _f[~np.isnan(_f)]
226
+ f__ = [_f.tolist()]
227
+
131
228
  if n_c_labels > 0:
132
229
  # Labels are present in last column.
133
230
  l_ = values[n_f_cols:n_f_cols+n_c_labels]
231
+
232
+ l__ = np.array([l_]) if model_name in np_func_list else [l_]
134
233
  # predict() now takes 'y' also for it to return the labels from script. Skipping 'y'
135
234
  # in function call. Generally, 'y' is passed to return y along with actual output.
136
235
  try:
@@ -138,23 +237,24 @@ while 1:
138
237
  # used 'in' in if constion, as model.__module__ is giving
139
238
  # 'sklearn.cross_decomposition._pls'.
140
239
  if "cross_decomposition" in model.__module__:
141
- trans_values = getattr(model, func_name)(X=np.array([f_]), Y=np.array([l_]))
240
+ trans_values = getattr(model, func_name)(X=f__, Y=l__)
142
241
  else:
143
- trans_values = getattr(model, func_name)(X=np.array([f_]), y=np.array([l_]))
242
+ trans_values = getattr(model, func_name)(X=f__, y=l__)
144
243
 
145
244
  except TypeError as ex:
146
245
  # Function which does not accept 'y' like predict_proba() raises error like
147
246
  # "TypeError: predict_proba() takes 2 positional arguments but 3 were given".
148
- trans_values = getattr(model, func_name)(np.array([f_]))
247
+ trans_values = getattr(model, func_name)(f__)
149
248
  else:
150
249
  # If class labels do not exist in data, don't read labels, read just features.
151
- trans_values = getattr(model, func_name)(np.array([f_]))
250
+ trans_values = getattr(model, func_name)(f__)
152
251
 
153
252
  result_list = f_
154
253
  if n_c_labels > 0 and func_name in ["predict", "decision_function"]:
155
254
  result_list += l_
156
255
  result_list += get_output_data(trans_values=trans_values, func_name=func_name,
157
- model_obj=model, n_c_labels=n_c_labels)
256
+ model_obj=model, n_c_labels=n_c_labels,
257
+ n_out_columns=no_of_output_columns)
158
258
 
159
259
  for i, val in enumerate(result_list):
160
260
  if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
@@ -169,3 +269,41 @@ while 1:
169
269
 
170
270
  except EOFError: # Exit if reached EOF or CTRL-D
171
271
  break
272
+
273
+
274
+ # MissingIndicator and SimpleImputer needs processing of all the dataset at the same time, instead of row by row.
275
+ # Hence, handling it outside of the while loop
276
+ if model_name == "MissingIndicator" and func_name == "transform" or \
277
+ (model_name == "SimpleImputer" and func_name == "inverse_transform"):
278
+ if model_name == "SimpleImputer":
279
+ all_rows_input = np.array(all_rows_input)
280
+ m_out = getattr(model, func_name)(all_rows_input)
281
+
282
+ if type(m_out).__name__ in ["csr_matrix", "csc_matrix"]:
283
+ m_out = m_out.toarray()
284
+
285
+ for j in range(len(all_rows_input)):
286
+ m_out_list = get_output_data(trans_values=[m_out[j]], func_name=func_name,
287
+ model_obj=model, n_c_labels=n_c_labels,
288
+ n_out_columns=no_of_output_columns)
289
+
290
+ result_list = list(all_rows_input[j]) + list(m_out_list)
291
+
292
+ for i, val in enumerate(result_list):
293
+ if (val is None or (not isinstance(val, str) and (math.isnan(val) or math.isinf(val)))):
294
+ result_list[i] = ""
295
+ # MissingIndicator returns boolean values. Convert them to 0/1.
296
+ elif val == False:
297
+ result_list[i] = 0
298
+ elif val == True:
299
+ result_list[i] = 1
300
+
301
+ print(*(data_partition_column_values + result_list), sep=DELIMITER)
302
+
303
+ ## correct_covariance() requires processing of all the input rows at the same time.
304
+ ## It returns the output dataset in (n_features, n_features) shape, i.e., based on
305
+ ## no. of columns.
306
+ if model_name in ["EllipticEnvelope", "MinCovDet"] and func_name == "correct_covariance":
307
+ result_list = model.correct_covariance(np.array(all_rows_input))
308
+ for l, vals in enumerate(result_list):
309
+ print(*(data_partition_column_values + vals.tolist()), sep=DELIMITER)
@@ -0,0 +1,8 @@
1
+ country,state,yr,qtr,sales,cogs,rating
2
+ USA,CA,2001,Q1,30,15,A
3
+ USA,NY,2001,Q1,45,25,D
4
+ USA,CA,2001,Q2,50,20,A
5
+ USA,CA,2001,Q2,5,5,B
6
+ Canada,ON,2001,Q2,10,0,B
7
+ Canada,BC,2001,Q3,15,0,A
8
+ Canada,BC,2001,Q3,10,0,A
@@ -0,0 +1,8 @@
1
+ id,array_col
2
+ 1,"3.33e-05,0.2,0.1"
3
+ 2,"0.5,0.4,0.42"
4
+ 3,"1,0.8,0.9"
5
+ 4,"0.01,0.4,0.2"
6
+ 5,"0.93,0.4,0.7"
7
+ 6,"0.83,0.3,0.6"
8
+ 7,"0.73,0.5,0.7"
@@ -2,7 +2,9 @@
2
2
  "env_specs": [
3
3
  {
4
4
  "env_name": "openml_env",
5
- "libs": "scikit-learn",
5
+ "libs": ["scikit-learn==1.5.1", "joblib==1.4.2", "numpy==1.23.5",
6
+ "scipy==1.14.0", "threadpoolctl==3.5.0", "lightgbm==3.3.3",
7
+ "pandas==2.2.3"],
6
8
  "desc": "DONT DELETE: OpenML environment"
7
9
  }
8
10
  ]
@@ -1348,6 +1348,25 @@
1348
1348
  "radio":"FLOAT",
1349
1349
  "newspaper":"FLOAT",
1350
1350
  "sales":"FLOAT"
1351
+ },
1352
+ "timestamp_data":{
1353
+ "id": "INTEGER",
1354
+ "timestamp_col": "VARCHAR(50)",
1355
+ "timestamp_col1": "BIGINT",
1356
+ "format_col": "VARCHAR(50)",
1357
+ "timezone_col": "VARCHAR(50)"
1358
+ },
1359
+ "interval_data":{
1360
+ "id": "INTEGER",
1361
+ "int_col": "BIGINT",
1362
+ "value_col": "VARCHAR(30)",
1363
+ "value_col1": "VARCHAR(30)",
1364
+ "str_col1": "VARCHAR(30)",
1365
+ "str_col2": "VARCHAR(30)"
1366
+ },
1367
+ "url_data": {
1368
+ "id": "INTEGER",
1369
+ "urls": "VARCHAR(60)",
1370
+ "part": "VARCHAR(20)"
1351
1371
  }
1352
-
1353
1372
  }
@@ -0,0 +1,4 @@
1
+ id,timestamp_col,timestamp_col1,format_col,timezone_col
2
+ 0,"2015-01-08 00:00:12.2",123456,"YYYY-MM-DD HH24:MI:SS.FF6","GMT"
3
+ 1,"2015-01-08 13:00",878986,"YYYY-MM-DD HH24:MI","America Pacific"
4
+ 2,"2015-01-08 00:00:12.2+10:00",45678910234,"YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM","GMT+10"
@@ -0,0 +1,19 @@
1
+ passenger,AttributeName,AttributeValue,survived
2
+ 61,pclass,3,0
3
+ 1000,pclass,3,1
4
+ 40,pclass,3,1
5
+ 21,pclass,2,0
6
+ 61,gender,male,0
7
+ 1000,gender,,1
8
+ 40,gender,female,1
9
+ 21,gender,male,0
10
+ 2,pclass,1,1
11
+ 16,pclass,2,1
12
+ 7,pclass,1,0
13
+ 2,gender,female,1
14
+ 16,gender,female,1
15
+ 7,gender,male,0
16
+ 10,pclass,2,1
17
+ 4,pclass,1,1
18
+ 10,gender,female,1
19
+ 4,gender,female,1
@@ -471,5 +471,59 @@
471
471
  "CONF_OFF_v": "FLOAT",
472
472
  "CONF_LOW_v": "FLOAT",
473
473
  "CONF_HI_v": "FLOAT"
474
+ },
475
+ "dwt_dataTable":{
476
+ "id": "INTEGER",
477
+ "rowi": "INTEGER",
478
+ "v": "FLOAT"
479
+ },
480
+ "dwt_filterTable":{
481
+ "id": "INTEGER",
482
+ "seq": "INTEGER",
483
+ "lo": "FLOAT",
484
+ "hi": "FLOAT"
485
+ },
486
+ "idwt_dataTable":{
487
+ "id": "INTEGER",
488
+ "rowi": "INTEGER",
489
+ "approx": "FLOAT",
490
+ "detail": "FLOAT"
491
+ },
492
+ "idwt_filterTable":{
493
+ "id": "INTEGER",
494
+ "seq": "INTEGER",
495
+ "lo": "FLOAT",
496
+ "hi": "FLOAT"
497
+ },
498
+ "dwt2d_dataTable":{
499
+ "id": "INTEGER",
500
+ "x": "INTEGER",
501
+ "y": "INTEGER",
502
+ "v": "FLOAT"
503
+ },
504
+ "idwt2d_dataTable":{
505
+ "id": "INTEGER",
506
+ "x": "INTEGER",
507
+ "y": "INTEGER",
508
+ "v": "FLOAT"
509
+ },
510
+ "covid_confirm_sd":{
511
+ "city": "VARCHAR(15)",
512
+ "row_axis": "INTEGER",
513
+ "cnumber": "INTEGER"
514
+ },
515
+ "real_values":{
516
+ "TD_TIMECODE": "TIMESTAMP(0)",
517
+ "id": "INTEGER",
518
+ "val": "FLOAT",
519
+ "<PTI_CLAUSE>": "(TIMESTAMP(0), DATE '2020-01-01', HOURS(1), COLUMNS(id), nonsequenced)"
520
+ },
521
+ "windowdfft":{
522
+ "id": "INTEGER",
523
+ "row_i": "INTEGER",
524
+ "v1": "FLOAT",
525
+ "v2": "FLOAT",
526
+ "v3": "FLOAT",
527
+ "v4": "FLOAT"
474
528
  }
475
- }
529
+ }
@@ -6,5 +6,20 @@
6
6
  "temp": "integer",
7
7
  "pressure": "real",
8
8
  "dewpoint": "varchar(30)"
9
+ },
10
+ "titanic_dataset_unpivoted":{
11
+ "passenger": "integer",
12
+ "AttributeName": "varchar(30)",
13
+ "AttributeValue": "varchar(30)",
14
+ "survived": "integer"
15
+ },
16
+ "star_pivot":{
17
+ "country": "varchar(30)",
18
+ "state": "varchar(30)",
19
+ "yr": "integer",
20
+ "qtr": "varchar(30)",
21
+ "sales": "integer",
22
+ "cogs": "integer",
23
+ "rating": "varchar(30)"
9
24
  }
10
25
  }
@@ -0,0 +1,9 @@
1
+ "id","urls","part"
2
+ 0,"http://example.com:8080/path","FILE"
3
+ 1,"ftp://example.net:21/path","PATH"
4
+ 2,"https://example.net/path4/path5/path6?query4=value4#fragment3","REF"
5
+ 3,"https://www.facebook.com","HOST"
6
+ 4,"https://teracloud-pod-services-pod-account-service.dummyvalue.production.pods.teracloud.ninja/v1/accounts/acc-dummyvalue/user-environment-service/api/v1/","QUERY"
7
+ 5,"http://pg.example.ml/path150#fragment90","AUTHORITY"
8
+ 6,"smtp://user:password@smtp.example.com:21/file.txt","USERINFO"
9
+ 7,"https://www.google.com","PROTOCOL"
@@ -22,5 +22,9 @@
22
22
  "CallDuration": "REAL",
23
23
  "DataCounter": "REAL",
24
24
  "SMS": "REAL"
25
+ },
26
+ "target_udt_data":{
27
+ "id": "INTEGER",
28
+ "array_col":"AIVector"
25
29
  }
26
30
  }
@@ -0,0 +1,16 @@
1
+ id,row_i,v1,v2,v3,v4
2
+ 3,1,0.0,1.4,1.0,1.0
3
+ 3,2,1.0,2.4,2.0,2.0
4
+ 3,3,2.0,3.4,3.0,3.0
5
+ 3,4,3.0,4.6,4.0,4.0
6
+ 3,5,0.0,5.9,5.0,5.0
7
+ 3,6,1.0,6.7,6.0,6.0
8
+ 3,7,2.0,7.7,7.0,7.0
9
+ 3,8,3.0,8.7,8.0,8.0
10
+ 3,9,0.0,9.9,9.0,9.0
11
+ 3,10,1.0,10.2,10.0,10.0
12
+ 3,11,2.0,11.2,11.0,11.0
13
+ 3,12,3.0,12.2,12.0,12.0
14
+ 3,13,1.0,10.2,13.0,13.0
15
+ 3,14,2.0,11.2,14.0,14.0
16
+ 3,15,3.0,12.2,15.0,15.0
@@ -30,7 +30,7 @@ from teradatasql import OperationalError
30
30
  from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
31
31
  from teradataml.utils.utils import execute_sql
32
32
  from teradataml.utils.validators import _Validators
33
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
33
+ from teradataml.telemetry_utils.queryband import collect_queryband
34
34
 
35
35
 
36
36
  @collect_queryband(queryband="CpToSql")
@@ -27,7 +27,7 @@ from teradataml.dataframe.copy_to import copy_to_sql, _create_table_object, \
27
27
  from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
28
28
  from teradataml.dbutils.dbutils import _create_table, _execute_query_and_generate_pandas_df
29
29
  from teradataml.utils.validators import _Validators
30
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
30
+ from teradataml.telemetry_utils.queryband import collect_queryband
31
31
 
32
32
 
33
33
  @collect_queryband(queryband="fstExprt")
@@ -1958,7 +1958,8 @@ class _DataTransferUtils():
1958
1958
  dt_obj = _DataTransferUtils(df)
1959
1959
  ins_query = dt_obj._table_exists()
1960
1960
  """
1961
- return con.dialect.has_table(get_connection(), self.table_name, self.schema_name)
1961
+ return con.dialect.has_table(get_connection(), self.table_name, self.schema_name,
1962
+ table_only=True)
1962
1963
 
1963
1964
  def _get_fully_qualified_table_name(self, table_name=None, schema_name=None):
1964
1965
  """
@@ -2144,7 +2145,8 @@ class _DataTransferUtils():
2144
2145
  # drop the tables created by FastloadCSV.
2145
2146
  if not self.save_errors:
2146
2147
  for table in fastloadcsv_err_tables:
2147
- if conn.dialect.has_table(conn, table_name=table, schema=self.schema_name):
2148
+ if conn.dialect.has_table(conn, table_name=table, schema=self.schema_name,
2149
+ table_only=True):
2148
2150
  UtilFuncs._drop_table(self._get_fully_qualified_table_name(table))
2149
2151
  err_warn_dict.update({"fastloadcsv_error_tables": []})
2150
2152
  return err_warn_dict