teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (200) hide show
  1. teradataml/LICENSE.pdf +0 -0
  2. teradataml/README.md +112 -0
  3. teradataml/__init__.py +6 -3
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/__init__.py +3 -2
  6. teradataml/analytics/analytic_function_executor.py +224 -16
  7. teradataml/analytics/analytic_query_generator.py +92 -0
  8. teradataml/analytics/byom/__init__.py +3 -2
  9. teradataml/analytics/json_parser/metadata.py +1 -0
  10. teradataml/analytics/json_parser/utils.py +6 -4
  11. teradataml/analytics/meta_class.py +40 -1
  12. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  13. teradataml/analytics/sqle/__init__.py +10 -2
  14. teradataml/analytics/table_operator/__init__.py +3 -2
  15. teradataml/analytics/uaf/__init__.py +21 -2
  16. teradataml/analytics/utils.py +62 -1
  17. teradataml/analytics/valib.py +1 -1
  18. teradataml/automl/__init__.py +1502 -323
  19. teradataml/automl/custom_json_utils.py +139 -61
  20. teradataml/automl/data_preparation.py +245 -306
  21. teradataml/automl/data_transformation.py +32 -12
  22. teradataml/automl/feature_engineering.py +313 -82
  23. teradataml/automl/model_evaluation.py +44 -35
  24. teradataml/automl/model_training.py +109 -146
  25. teradataml/catalog/byom.py +8 -8
  26. teradataml/clients/pkce_client.py +1 -1
  27. teradataml/common/constants.py +37 -0
  28. teradataml/common/deprecations.py +13 -7
  29. teradataml/common/garbagecollector.py +151 -120
  30. teradataml/common/messagecodes.py +4 -1
  31. teradataml/common/messages.py +2 -1
  32. teradataml/common/sqlbundle.py +1 -1
  33. teradataml/common/utils.py +97 -11
  34. teradataml/common/wrapper_utils.py +1 -1
  35. teradataml/context/context.py +72 -2
  36. teradataml/data/complaints_test_tokenized.csv +353 -0
  37. teradataml/data/complaints_tokens_model.csv +348 -0
  38. teradataml/data/covid_confirm_sd.csv +83 -0
  39. teradataml/data/dataframe_example.json +10 -0
  40. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  41. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  42. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  43. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  44. teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
  45. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  46. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  47. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  48. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  49. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  51. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  52. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  53. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  54. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  55. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  56. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  57. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  58. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  59. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  60. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  61. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  62. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  63. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  64. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  65. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  66. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  67. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  68. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  69. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  70. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  71. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  72. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  74. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  75. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  76. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  77. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  78. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  79. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  80. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  81. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  82. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  83. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  84. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  85. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  86. teradataml/data/dwt2d_dataTable.csv +65 -0
  87. teradataml/data/dwt_dataTable.csv +8 -0
  88. teradataml/data/dwt_filterTable.csv +3 -0
  89. teradataml/data/finance_data4.csv +13 -0
  90. teradataml/data/grocery_transaction.csv +19 -0
  91. teradataml/data/idwt2d_dataTable.csv +5 -0
  92. teradataml/data/idwt_dataTable.csv +8 -0
  93. teradataml/data/idwt_filterTable.csv +3 -0
  94. teradataml/data/interval_data.csv +5 -0
  95. teradataml/data/jsons/paired_functions.json +14 -0
  96. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  97. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  98. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  99. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  100. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  101. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  102. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  103. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  104. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  105. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  106. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  107. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  108. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  109. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  110. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  111. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  112. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  113. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  114. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  115. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  116. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  117. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  118. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  119. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  120. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  121. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  122. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  123. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  124. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  125. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  126. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  127. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  128. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  129. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  130. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  131. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  132. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  133. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  134. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  135. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  136. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  137. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  138. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  139. teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
  140. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  141. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  142. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  143. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  144. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  145. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
  146. teradataml/data/load_example_data.py +8 -2
  147. teradataml/data/naivebayestextclassifier_example.json +1 -1
  148. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  149. teradataml/data/peppers.png +0 -0
  150. teradataml/data/real_values.csv +14 -0
  151. teradataml/data/sax_example.json +8 -0
  152. teradataml/data/scripts/deploy_script.py +1 -1
  153. teradataml/data/scripts/sklearn/sklearn_fit.py +17 -10
  154. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +2 -2
  155. teradataml/data/scripts/sklearn/sklearn_function.template +30 -7
  156. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  157. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  158. teradataml/data/scripts/sklearn/sklearn_transform.py +55 -4
  159. teradataml/data/star_pivot.csv +8 -0
  160. teradataml/data/templates/open_source_ml.json +2 -1
  161. teradataml/data/teradataml_example.json +20 -1
  162. teradataml/data/timestamp_data.csv +4 -0
  163. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  164. teradataml/data/uaf_example.json +55 -1
  165. teradataml/data/unpivot_example.json +15 -0
  166. teradataml/data/url_data.csv +9 -0
  167. teradataml/data/windowdfft.csv +16 -0
  168. teradataml/dataframe/copy_to.py +1 -1
  169. teradataml/dataframe/data_transfer.py +5 -3
  170. teradataml/dataframe/dataframe.py +474 -41
  171. teradataml/dataframe/fastload.py +3 -3
  172. teradataml/dataframe/functions.py +339 -0
  173. teradataml/dataframe/row.py +160 -0
  174. teradataml/dataframe/setop.py +2 -2
  175. teradataml/dataframe/sql.py +658 -20
  176. teradataml/dataframe/window.py +1 -1
  177. teradataml/dbutils/dbutils.py +322 -16
  178. teradataml/geospatial/geodataframe.py +1 -1
  179. teradataml/geospatial/geodataframecolumn.py +1 -1
  180. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  181. teradataml/lib/aed_0_1.dll +0 -0
  182. teradataml/opensource/sklearn/_sklearn_wrapper.py +154 -69
  183. teradataml/options/__init__.py +3 -1
  184. teradataml/options/configure.py +14 -2
  185. teradataml/options/display.py +2 -2
  186. teradataml/plot/axis.py +4 -4
  187. teradataml/scriptmgmt/UserEnv.py +10 -6
  188. teradataml/scriptmgmt/lls_utils.py +3 -2
  189. teradataml/table_operators/Script.py +2 -2
  190. teradataml/table_operators/TableOperator.py +106 -20
  191. teradataml/table_operators/table_operator_util.py +88 -41
  192. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  193. teradataml/telemetry_utils/__init__.py +0 -0
  194. teradataml/telemetry_utils/queryband.py +52 -0
  195. teradataml/utils/validators.py +1 -1
  196. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +115 -2
  197. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +200 -140
  198. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
  199. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
  200. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
@@ -15,6 +15,7 @@
15
15
 
16
16
  # Python libraries
17
17
  import time
18
+ import ast
18
19
 
19
20
  # Teradata libraries
20
21
  from teradataml.dataframe.dataframe import DataFrame
@@ -56,7 +57,12 @@ class _ModelEvaluator:
56
57
  self.target_column = target_column
57
58
  self.task_type = task_type
58
59
 
59
- def model_evaluation(self, rank, table_name_mapping, test_data_ind = False, target_column_ind = False):
60
+ def model_evaluation(self,
61
+ rank,
62
+ table_name_mapping,
63
+ data_node_id,
64
+ target_column_ind = True,
65
+ get_metrics = False):
60
66
  """
61
67
  DESCRIPTION:
62
68
  Function performs the model evaluation on the specified rank in leaderborad.
@@ -72,25 +78,32 @@ class _ModelEvaluator:
72
78
  Specifies the mapping of train,test table names.
73
79
  Types: dict
74
80
 
75
- test_data_ind:
76
- Optional Argument.
77
- Specifies whether test data is present or not.
78
- Default Value: False
79
- Types: bool
81
+ data_node_id:
82
+ Required Argument.
83
+ Specifies the test data node id.
84
+ Types: str
80
85
 
81
86
  target_column_ind:
82
87
  Optional Argument.
83
88
  Specifies whether target column is present in the dataset or not.
89
+ Default Value: True
90
+ Types: bool
91
+
92
+ get_metrics:
93
+ Optional Argument.
94
+ Specifies whether to return metrics or not.
84
95
  Default Value: False
96
+ Types: bool
85
97
 
86
98
  RETURNS:
87
99
  tuple containing, performance metrics and predicitions of specified rank ML model.
88
100
 
89
101
  """
90
- # Setting test data indicator and target column indicator
91
- self.test_data_ind = test_data_ind
102
+ # Setting target column indicator
92
103
  self.target_column_ind = target_column_ind
93
104
  self.table_name_mapping = table_name_mapping
105
+ self.data_node_id = data_node_id
106
+ self.get_metrics = get_metrics
94
107
 
95
108
  # Return predictions only if test data is present and target column is not present
96
109
  return self._evaluator(rank)
@@ -114,38 +127,34 @@ class _ModelEvaluator:
114
127
  """
115
128
  # Extracting model using rank
116
129
  model = self.model_info.loc[rank]
130
+
131
+ ml_name = self.model_info.loc[rank]['MODEL_ID'].split('_')[0]
117
132
 
118
133
  # Defining eval_params
119
- eval_params = _ModelTraining._eval_params_generation(model['Name'],
134
+ eval_params = _ModelTraining._eval_params_generation(ml_name,
120
135
  self.target_column,
121
136
  self.task_type)
122
137
 
123
- # Test Data
124
- test = DataFrame(self.table_name_mapping['{}_test'.format(model['Feature-Selection'])])
125
-
126
- # Getting test data from table
127
- if not self.test_data_ind:
128
- # Test Data
129
- test = DataFrame(self.table_name_mapping['{}_test'.format(model['Feature-Selection'])])
130
- else:
131
- test = DataFrame(self.table_name_mapping['{}_new_test'.format(model['Feature-Selection'])])
132
-
133
- print("\nFollowing model is being used for generating prediction :")
134
- print("Model ID :", model['Model-ID'],
135
- "\nFeature Selection Method :",model['Feature-Selection'])
138
+ # Extracting test data for evaluation based on data node id
139
+ test = DataFrame(self.table_name_mapping[self.data_node_id]['{}_new_test'.format(model['FEATURE_SELECTION'])])
136
140
 
137
- # Evaluation and predictions
138
- if model['Name'] == 'knn':
139
- metrics = model['model-obj'].evaluate(test_data=test)
140
- pred = model['model-obj'].predict(test_data=test)
141
+ print("\nFollowing model is being picked for evaluation:")
142
+ print("Model ID :", model['MODEL_ID'],
143
+ "\nFeature Selection Method :",model['FEATURE_SELECTION'])
144
+
145
+ if self.task_type.lower() == 'classification':
146
+ params = ast.literal_eval(model['PARAMETERS'])
147
+ eval_params['output_responses'] = params['output_responses']
148
+
149
+ # Mapping data according to model type
150
+ data_map = 'test_data' if ml_name == 'KNN' else 'newdata'
151
+ # Performing evaluation if get_metrics is True else returning predictions
152
+ if self.get_metrics:
153
+ metrics = model['model-obj'].evaluate(**{data_map: test}, **eval_params)
154
+ return metrics
141
155
  else:
142
- # Return predictions only if test data is present and target column is not present
143
- if self.test_data_ind and not self.target_column_ind:
156
+ # Removing accumulate parameter if target column is not present
157
+ if not self.target_column_ind:
144
158
  eval_params.pop("accumulate")
145
- pred = model['model-obj'].predict(newdata=test, **eval_params)
146
- return pred
147
- # Return both metrics and predictions for all other cases
148
- metrics = model['model-obj'].evaluate(newdata=test, **eval_params)
149
- pred = model['model-obj'].predict(newdata=test, **eval_params)
150
-
151
- return (metrics, pred)
159
+ pred = model['model-obj'].predict(**{data_map: test}, **eval_params)
160
+ return pred
@@ -19,6 +19,7 @@ from concurrent.futures import ThreadPoolExecutor
19
19
  import math
20
20
  import pandas as pd
21
21
  from itertools import product
22
+ import numpy as np
22
23
 
23
24
  # Teradata libraries
24
25
  from teradataml.context import context as tdmlctx
@@ -38,7 +39,8 @@ class _ModelTraining:
38
39
  verbose=0,
39
40
  features=None,
40
41
  task_type="Regression",
41
- custom_data = None):
42
+ custom_data = None,
43
+ **kwargs):
42
44
  """
43
45
  DESCRIPTION:
44
46
  Function initializes the data, target column, features and models
@@ -89,6 +91,28 @@ class _ModelTraining:
89
91
  Optional Argument.
90
92
  Specifies json object containing user customized input.
91
93
  Types: json object
94
+
95
+ **kwargs:
96
+ Specifies the additional arguments for model training. Below
97
+ are the additional arguments:
98
+ volatile:
99
+ Optional Argument.
100
+ Specifies whether to put the interim results of the
101
+ functions in a volatile table or not. When set to
102
+ True, results are stored in a volatile table,
103
+ otherwise not.
104
+ Default Value: False
105
+ Types: bool
106
+
107
+ persist:
108
+ Optional Argument.
109
+ Specifies whether to persist the interim results of the
110
+ functions in a table or not. When set to True,
111
+ results are persisted in a table; otherwise,
112
+ results are garbage collected at the end of the
113
+ session.
114
+ Default Value: False
115
+ Types: bool
92
116
  """
93
117
  self.data = data
94
118
  self.target_column = target_column
@@ -99,6 +123,8 @@ class _ModelTraining:
99
123
  self.custom_data = custom_data
100
124
  self.labels = self.data.drop_duplicate(self.target_column).size
101
125
  self.startify_col = None
126
+ self.persist = kwargs.get("persist", False)
127
+ self.volatile = kwargs.get("volatile", False)
102
128
 
103
129
  def model_training(self,
104
130
  auto=True,
@@ -278,20 +304,25 @@ class _ModelTraining:
278
304
  """
279
305
  # Creating a copy to avoid use of same reference of memory
280
306
  if self.task_type != "Regression":
281
- sorted_model_df = trained_models_info.sort_values(by=['Micro-F1', 'Weighted-F1'],
282
- ascending=[False, False]).reset_index(drop=True)
307
+ sorted_model_df = trained_models_info.sort_values(by=['MICRO-F1', 'WEIGHTED-F1'],
308
+ ascending=[False, False]).reset_index(drop=True)
283
309
  else:
284
- sorted_model_df = trained_models_info.sort_values(by='R2-score',
285
- ascending=False).reset_index(drop=True)
310
+ sorted_model_df = trained_models_info.sort_values(by='R2',
311
+ ascending=False).reset_index(drop=True)
312
+
286
313
 
287
314
  # Adding rank to leaderboard
288
- sorted_model_df.insert(0, 'Rank', sorted_model_df.index + 1)
315
+ sorted_model_df.insert(0, 'RANK', sorted_model_df.index + 1)
316
+
317
+ # Internal Data list for leaderboard
318
+ dp_lst = ["model-obj", "DATA_TABLE", "RESULT_TABLE", "PARAMETERS"]
289
319
 
290
320
  # Excluding the model object and model name from leaderboard
291
- leaderboard = sorted_model_df.drop(["model-obj","Name"], axis=1)
321
+ leaderboard = sorted_model_df.drop(dp_lst, axis=1)
322
+
292
323
  # filtering the rows based on the max_models
293
324
  if self.max_models is not None:
294
- leaderboard = leaderboard[leaderboard["Rank"] <= self.max_models]
325
+ leaderboard = leaderboard[leaderboard["RANK"] <= self.max_models]
295
326
 
296
327
  self._display_msg(msg="Leaderboard",
297
328
  progress_bar=self.progress_bar,
@@ -436,28 +467,24 @@ class _ModelTraining:
436
467
  max_depth.extend([6, 7, 8])
437
468
  min_node_size.extend([2])
438
469
  iter_num.extend([20])
439
- num_trees.extend([10, 20])
440
470
  elif num_rows < 10000 and num_cols < 15:
441
471
  min_impurity.extend([0.1, 0.2])
442
472
  shrinkage_factor.extend([0.1, 0.3])
443
473
  max_depth.extend([6, 8, 10])
444
474
  min_node_size.extend([2, 3])
445
475
  iter_num.extend([20, 30])
446
- num_trees.extend([20, 30])
447
476
  elif num_rows < 100000 and num_cols < 20:
448
477
  min_impurity.extend([0.2, 0.3])
449
478
  shrinkage_factor.extend([0.01, 0.1, 0.2])
450
479
  max_depth.extend([4, 6, 7])
451
480
  min_node_size.extend([3, 4])
452
481
  iter_num.extend([30, 40])
453
- num_trees.extend([30, 40])
454
482
  else:
455
483
  min_impurity.extend([0.1, 0.2, 0.3])
456
484
  shrinkage_factor.extend([0.01, 0.05, 0.1])
457
485
  max_depth.extend([3, 4, 7, 8])
458
486
  min_node_size.extend([2, 3, 4])
459
487
  iter_num.extend([20, 30, 40])
460
- num_trees.extend([20, 30, 40])
461
488
 
462
489
  # Hyperparameters for XGBoost model
463
490
  xgb_params = {
@@ -736,12 +763,15 @@ class _ModelTraining:
736
763
 
737
764
  # Hyperparameters for each model
738
765
  model_params = parameters[:min(len(parameters), 5)]
739
- self._display_msg(msg="\nPerforming hyperParameter tuning ...", progress_bar=self.progress_bar)
766
+ self._display_msg(msg="\nPerforming hyperparameter tuning ...", progress_bar=self.progress_bar)
740
767
 
741
- # Defining training and testing data
768
+ # Defining training data
742
769
  data_types = ['lasso', 'rfe', 'pca']
743
770
  trainng_datas = tuple(DataFrame(self.table_name_mapping[f'{data_type}_train']) for data_type in data_types)
744
- testing_datas = tuple(DataFrame(self.table_name_mapping[f'{data_type}_test']) for data_type in data_types)
771
+
772
+ if self.task_type == "Classification":
773
+ response_values = trainng_datas[0].get(self.target_column).drop_duplicate().get_values().flatten().tolist()
774
+ self.output_response = [str(i) for i in response_values]
745
775
 
746
776
  if self.stopping_metric is None:
747
777
  self.stopping_tolerance, self.stopping_metric = 1.0, 'MICRO-F1' \
@@ -765,115 +795,15 @@ class _ModelTraining:
765
795
 
766
796
  trained_models = []
767
797
  for param in model_params:
768
- result = self._hyperparameter_tunning(param, trainng_datas, testing_datas)
798
+ result = self._hyperparameter_tunning(param, trainng_datas)
769
799
  trained_models.append(result)
770
800
 
771
801
  models_df = pd.concat(trained_models, ignore_index=True)
772
-
773
- # Score the model and combine the results into a single DataFrame
774
- trained_models_info = self._model_scoring(testing_datas, models_df)
775
- trained_models_info = trained_models_info.reset_index(drop=True)
776
-
777
- return trained_models_info
778
-
779
- def _model_scoring(self,
780
- test_data,
781
- model_info):
782
- """
783
- DESCRIPTION:
784
- Internal function generates the performance metrics for
785
- trained ML models using testing dataset.
786
-
787
- PARAMETERS:
788
- test_data
789
- Required Argument.
790
- Specifies the testing datasets
791
- Types: tuple of Teradataml DataFrame
792
-
793
- model_info
794
- Required Argument.
795
- Specifies the trained models information.
796
- Types: Pandas DataFrame
797
-
798
- RETURNS:
799
- Pandas DataFrame containing, trained models with their performance metrics.
800
- """
801
- self._display_msg(msg="Evaluating models performance ...",
802
- progress_bar = self.progress_bar,
803
- show_data=True)
804
- # Empty list for storing model performance metrics
805
- model_performance_data = []
806
-
807
- # Mapping feature selection methods to corresponding test data
808
- feature_selection_to_test_data = {"lasso": test_data[0],
809
- "rfe": test_data[1],
810
- "pca": test_data[2]}
811
-
812
- # Iterating over models
813
- for index, model_row in model_info.iterrows():
814
- # Extracting model name, model id, feature selection method, and model object
815
- model_name, model_id, feature_selection, model_object = model_row['Name'], \
816
- model_row['Model-ID'], model_row['Feature-Selection'], model_row['obj']
817
-
818
- # Selecting test data based on feature selection method
819
- test_set = feature_selection_to_test_data[feature_selection]
820
-
821
- # Model evaluation
822
- if model_name == 'knn':
823
- performance_metrics = model_object.evaluate(test_data=test_set)
824
- else:
825
- eval_params = _ModelTraining._eval_params_generation(model_name,
826
- self.target_column,
827
- self.task_type)
828
- performance_metrics = model_object.evaluate(newdata=test_set, **eval_params)
829
-
830
- # Extracting performance metrics
831
- if self.is_classification_type():
832
- # Classification
833
- # Extract performance metrics from the output data
834
- performance_metrics_list = [metric[2] for metric in performance_metrics.output_data.itertuples()]
835
-
836
- # Combine all the elements to form a new row
837
- new_row = [model_name, model_id, feature_selection] + performance_metrics_list + [model_object]
838
- else:
839
- # Regression
840
- regression_metrics = next(performance_metrics.result.itertuples())
841
- sample_size = test_set.select('id').size
842
- feature_count = len(test_set.columns) - 2
843
- r2_score = regression_metrics[8]
844
- adjusted_r2_score = 1 - ((1 - r2_score) * (sample_size - 1) / (sample_size - feature_count - 1))
845
- new_row = [model_name, model_id, feature_selection, regression_metrics[0],
846
- regression_metrics[1], regression_metrics[2], regression_metrics[5],
847
- regression_metrics[6], r2_score, adjusted_r2_score, model_object]
848
-
849
- model_performance_data.append(new_row)
850
-
851
- if self.is_classification_type():
852
- model_metrics_df = pd.DataFrame(model_performance_data, columns=['Name','Model-ID',
853
- 'Feature-Selection','Accuracy','Micro-Precision',
854
- 'Micro-Recall','Micro-F1',
855
- 'Macro-Precision','Macro-Recall',
856
- 'Macro-F1','Weighted-Precision',
857
- 'Weighted-Recall','Weighted-F1',
858
- 'model-obj'])
859
- else:
860
- model_metrics_df = pd.DataFrame(model_performance_data, columns=['Name', 'Model-ID',
861
- 'Feature-Selection',
862
- 'MAE', 'MSE', 'MSLE',
863
- 'RMSE', 'RMSLE',
864
- 'R2-score',
865
- 'Adjusted R2-score',
866
- 'model-obj'])
867
- self._display_msg(msg="Evaluation completed.",
868
- progress_bar = self.progress_bar,
869
- show_data=True)
870
-
871
- return model_metrics_df
872
-
802
+ return models_df
803
+
873
804
  def _hyperparameter_tunning(self,
874
805
  model_param,
875
- train_data,
876
- test_data):
806
+ train_data):
877
807
  """
878
808
  DESCRIPTION:
879
809
  Internal function performs hyperparameter tuning on
@@ -890,11 +820,6 @@ class _ModelTraining:
890
820
  Specifies the training datasets.
891
821
  Types: tuple of Teradataml DataFrame
892
822
 
893
- test_data
894
- Required Argument.
895
- Specifies the testing datasets
896
- Types: tuple of Teradataml DataFrame
897
-
898
823
  RETURNS:
899
824
  pandas DataFrame containing, trained models information.
900
825
  """
@@ -910,13 +835,21 @@ class _ModelTraining:
910
835
  # Input columns for model
911
836
  model_param['input_columns'] = self.features
912
837
 
838
+ # Setting persist for model
839
+ model_param['persist'] = self.persist
840
+
913
841
  self._display_msg(msg=model_param['name'],
914
842
  progress_bar=self.progress_bar,
915
843
  show_data=True)
916
844
 
917
- # Defining test data for KNN
845
+ # As we are using entire data for HPT training. So,
846
+ # passing prepared training data as test_data for KNN.
918
847
  if model_param['name'] == 'knn':
919
- model_param['test_data'] = test_data
848
+ model_param['test_data'] = train_data
849
+
850
+ if self.task_type == "Classification":
851
+ model_param['output_prob'] = True
852
+ model_param['output_responses'] = self.output_response
920
853
 
921
854
  # Using RandomSearch for hyperparameter tunning when max_models is given.
922
855
  # Otherwise, using GridSearch for hyperparameter tunning.
@@ -951,20 +884,35 @@ class _ModelTraining:
951
884
  sample_id_column='id',stratify_column=self.startify_col, verbose=verbose, max_time=self.max_runtime_secs)
952
885
 
953
886
  # Getting all passed models
954
- _df = _obj.model_stats.merge(_obj.models[_obj.models['STATUS']=='PASS'][['MODEL_ID', 'DATA_ID']], on='MODEL_ID', how='inner')
887
+ model_info = _obj.model_stats.merge(_obj.models[_obj.models['STATUS']=='PASS'][['MODEL_ID', 'DATA_ID', 'PARAMETERS']],
888
+ on='MODEL_ID', how='inner')
955
889
  # Creating mapping data ID to feature selection method
956
- data_id_to_method_map = {"DF_0": "lasso", "DF_1": "rfe", "DF_2": "pca"}
957
-
958
- # Mapping data ID to feature selection method
959
- _df['Feature-Selection'] = _df['DATA_ID'].map(data_id_to_method_map)
960
- # Getting model details
961
- _df['Name'] = model_param['name']
962
- _df['Model-ID'] = _df['MODEL_ID']
963
- _df['obj'] = _df['MODEL_ID'].apply(lambda x: _obj.get_model(x))
964
-
965
- # Extracting needed columns
966
- model_info = _df[["Name", "Model-ID", "Feature-Selection", "obj"]]
967
-
890
+ data_id_to_table_map = {"DF_0": ('lasso', train_data[0]._table_name),
891
+ "DF_1": ('rfe', train_data[1]._table_name),
892
+ "DF_2": ('pca', train_data[2]._table_name)}
893
+
894
+ # Updating model stats with feature selection method and result table
895
+ for index, row in model_info.iterrows():
896
+ model_info.loc[index, 'FEATURE_SELECTION'] = data_id_to_table_map[row['DATA_ID']][0]
897
+ model_info.loc[index, 'DATA_TABLE'] = data_id_to_table_map[row['DATA_ID']][1]
898
+ model_info.loc[index, 'RESULT_TABLE'] = _obj.get_model(row['MODEL_ID']).result._table_name
899
+ model_info.loc[index, 'model-obj'] = _obj.get_model(row['MODEL_ID'])
900
+
901
+ # Dropping column 'DATA_ID'
902
+ model_info.drop(['DATA_ID'], axis=1, inplace=True)
903
+
904
+ model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
905
+
906
+ if not self.is_classification_type():
907
+ # Calculating Adjusted-R2 for regression
908
+ # Getting size and feature count for each feature selection method
909
+ methods = ["lasso", "rfe", "pca"]
910
+ size_map = {method : df.select('id').size for method, df in zip(methods, train_data)}
911
+ feature_count_map = {method : len(df.columns) - 2 for method, df in zip(methods, train_data)}
912
+ model_info['ADJUSTED_R2'] = model_info.apply(lambda row:
913
+ 1 - ((1 - row['R2']) * (size_map[row['FEATURE_SELECTION']] - 1) /
914
+ (size_map[row['FEATURE_SELECTION']] - feature_count_map[row['FEATURE_SELECTION']] - 1)), axis=1)
915
+
968
916
  self._display_msg(msg="-"*100,
969
917
  progress_bar=self.progress_bar,
970
918
  show_data=True)
@@ -1006,21 +954,36 @@ class _ModelTraining:
1006
954
  # Setting the eval_params
1007
955
  eval_params = {"id_column": "id",
1008
956
  "accumulate": target_column}
957
+
958
+ model_type = {
959
+ 'xgboost': 'model_type',
960
+ 'glm': 'model_type',
961
+ 'decisionforest': 'tree_type',
962
+ 'svm': 'model_type',
963
+ 'knn': 'model_type'
964
+ }
965
+
966
+ ml_name = ml_name.replace('_', '').lower()
1009
967
 
1010
968
  # For Classification
1011
969
  if task_type.lower() != "regression":
970
+ eval_params[model_type[ml_name]] = 'Classification'
971
+ eval_params['output_prob'] = True
972
+
1012
973
  if ml_name == 'xgboost':
1013
- eval_params['model_type'] = 'Classification'
1014
974
  eval_params['object_order_column'] = ['task_index', 'tree_num', 'iter','class_num', 'tree_order']
1015
- else:
1016
- if ml_name == 'glm':
1017
- eval_params['family'] = 'BINOMIAL'
1018
-
1019
- eval_params['output_prob'] = True
975
+
976
+ elif ml_name == 'glm':
977
+ eval_params['family'] = 'BINOMIAL'
978
+
1020
979
  else:
1021
980
  # For Regression
981
+ eval_params[model_type[ml_name]] = 'Regression'
982
+
1022
983
  if ml_name == 'xgboost':
1023
- eval_params['model_type'] = 'Regression'
1024
984
  eval_params['object_order_column'] = ['task_index', 'tree_num', 'iter', 'tree_order']
985
+
986
+ elif ml_name == 'glm':
987
+ eval_params['family'] = 'GAUSSIAN'
1025
988
 
1026
989
  return eval_params
@@ -26,7 +26,7 @@ from teradataml.options.display import display
26
26
  from teradataml.common.constants import ModelCatalogingConstants as mac
27
27
  from teradataml.options.configure import configure
28
28
  from teradataml.utils.utils import execute_sql
29
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
29
+ from teradataml.telemetry_utils.queryband import collect_queryband
30
30
 
31
31
  validator = _Validators()
32
32
 
@@ -541,13 +541,12 @@ def save_byom(model_id,
541
541
  # If exists, extract required information about table columns types
542
542
  # else extract from additional_columns_types.
543
543
  # Also validate model_id against allowed length.
544
- table_exists = connection.dialect.has_table(connection, table_name=table_name, schema=schema_name)
544
+ table_exists = connection.dialect.has_table(connection, table_name=table_name,
545
+ schema=schema_name, table_only=True)
545
546
  if table_exists:
546
547
  # Check if model exists or not. If exists, raise error.
547
548
  __check_if_model_exists(
548
549
  model_id, table_name, schema_name, raise_error_if_model_found=True)
549
- if len(additional_columns_types) != 0:
550
- warnings.warn("Argument additional_columns_types is ignored since table already exists.", stacklevel=2)
551
550
 
552
551
  # Gather column name and type information from existing table
553
552
  existing_table_df = DataFrame(in_schema(schema_name, table_name))
@@ -807,7 +806,7 @@ def delete_byom(model_id, table_name=None, schema_name=None):
807
806
 
808
807
  # Before proceed further, check whether table exists or not.
809
808
  conn = get_connection()
810
- if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name):
809
+ if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name, table_only=True):
811
810
  error_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
812
811
  error_msg = Messages.get_message(
813
812
  error_code, "delete", 'Table "{}.{}" does not exist.'.format(schema_name, table_name))
@@ -1472,7 +1471,7 @@ def retrieve_byom(model_id,
1472
1471
 
1473
1472
  # Before proceeding further, check whether table exists or not.
1474
1473
  conn = get_connection()
1475
- if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name):
1474
+ if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name, table_only=True):
1476
1475
  error_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
1477
1476
  error_msg = Messages.get_message(
1478
1477
  error_code, "retrieve", 'Table "{}.{}" does not exist.'.format(schema_name, table_name))
@@ -1535,7 +1534,8 @@ def retrieve_byom(model_id,
1535
1534
  license_table = in_schema(license_schema_name, license_table_name)
1536
1535
 
1537
1536
  # Check whether license table exists or not before proceed further.
1538
- if not conn.dialect.has_table(conn, table_name=license_table_name, schema=license_schema_name):
1537
+ if not conn.dialect.has_table(conn, table_name=license_table_name, schema=license_schema_name,
1538
+ table_only=True):
1539
1539
  error_code = MessageCodes.EXECUTION_FAILED
1540
1540
  error_msg = Messages.get_message(
1541
1541
  error_code, "retrieve the model", 'Table "{}" does not exist.'.format(license_table))
@@ -1723,7 +1723,7 @@ def list_byom(table_name=None, schema_name=None, model_id=None):
1723
1723
 
1724
1724
  # Before proceeding further, check whether table exists or not.
1725
1725
  conn = get_connection()
1726
- if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name):
1726
+ if not conn.dialect.has_table(conn, table_name=table_name, schema=schema_name, table_only=True):
1727
1727
  error_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
1728
1728
  error_msg = Messages.get_message(
1729
1729
  error_code, "list", 'Table "{}.{}" does not exist.'.format(schema_name, table_name))
@@ -425,7 +425,7 @@ class _DAWorkflow:
425
425
  """
426
426
  device_cfg = requests.post(
427
427
  url=self.device_auth_end_point,
428
- data={'client_id': self.__client_id})
428
+ data={'client_id': self.__client_id, 'scope': 'openid'})
429
429
 
430
430
  # Check the status. If response is not 200, raise error.
431
431
  _Validators._validate_http_response(device_cfg, 200, "get the device metadata")
@@ -425,6 +425,8 @@ class TableOperatorConstants(Enum):
425
425
  MAP_TEMPLATE = "dataframe_map.template"
426
426
  # Template of the intermediate script that will be generated.
427
427
  APPLY_TEMPLATE = "dataframe_apply.template"
428
+ # Template of the intermediate script that will be generated for UDF.
429
+ UDF_TEMPLATE = "dataframe_udf.template"
428
430
  # In-DB execution mode.
429
431
  INDB_EXEC = "IN-DB"
430
432
  # Local execution mode.
@@ -439,6 +441,8 @@ class TableOperatorConstants(Enum):
439
441
  MAP_PARTITION_OP = "map_partition"
440
442
  # apply operation.
441
443
  APPLY_OP = "apply"
444
+ # udf operation.
445
+ UDF_OP = "udf"
442
446
  # Template of the script_executor that will be used to generate the temporary script_executor file.
443
447
  SCRIPT_TEMPLATE = "script_executor.template"
444
448
  # Log Type.
@@ -1369,6 +1373,7 @@ class TeradataAnalyticFunctionTypes(Enum):
1369
1373
  UAF = "UAF"
1370
1374
  TABLEOPERATOR = "TABLE_OPERATOR"
1371
1375
  BYOM = "BYOM"
1376
+ STORED_PROCEDURE = "STORED_PROCEDURE"
1372
1377
 
1373
1378
 
1374
1379
  class TeradataAnalyticFunctionInfo(Enum):
@@ -1379,6 +1384,8 @@ class TeradataAnalyticFunctionInfo(Enum):
1379
1384
  TABLE_OPERATOR = {"func_type": "tableoperator", "lowest_version": "17.00 ",
1380
1385
  "display_function_type_name" :"TABLE OPERATOR"}
1381
1386
  BYOM = {"func_type": "byom", "lowest_version": None, "display_function_type_name": "BYOM"}
1387
+ STORED_PROCEDURE = {"func_type": "storedprocedure", "lowest_version": "17.20", "display_function_type_name": "UAF",
1388
+ "metadata_class" : "_AnlyFuncMetadataUAF"}
1382
1389
 
1383
1390
  class TeradataUAFSpecificArgs(Enum):
1384
1391
  INPUT_MODE = "input_mode"
@@ -1436,3 +1443,33 @@ class CloudProvider(Enum):
1436
1443
  # and '2018-03-28', using the latest one.
1437
1444
  X_MS_VERSION = "2019-12-12"
1438
1445
  X_MS_BLOB_TYPE = "BlockBlob"
1446
+
1447
+ class SessionParamsSQL:
1448
+ # Holds the SQL Statements for Session params.
1449
+ TIMEZONE = "SET TIME ZONE {}"
1450
+ ACCOUNT = "SET SESSION ACCOUNT = '{}' FOR {}"
1451
+ CALENDAR = "SET SESSION CALENDAR = {}"
1452
+ CHARACTER_SET_UNICODE = "SET SESSION CHARACTER SET UNICODE PASS THROUGH {}"
1453
+ COLLATION = "SET SESSION COLLATION {}"
1454
+ CONSTRAINT = "SET SESSION CONSTRAINT = {}"
1455
+ DATABASE = "SET SESSION DATABASE {}"
1456
+ DATEFORM = "SET SESSION DATEFORM = {}"
1457
+ DEBUG_FUNCTION = "SET SESSION DEBUG FUNCTION {} {}"
1458
+ DOT_NOTATION = "SET SESSION DOT NOTATION {} ON ERROR"
1459
+ ISOLATED_LOADING = "SET SESSION FOR {} ISOLATED LOADING"
1460
+ FUNCTION_TRACE = "SET SESSION FUNCTION TRACE USING {} FOR TABLE {}"
1461
+ JSON_IGNORE_ERRORS = "SET SESSION JSON IGNORE ERRORS {}"
1462
+ SEARCHUIFDBPATH = "SET SESSION SEARCHUIFDBPATH = {}"
1463
+ TRANSACTION_ISOLATION_LEVEL = "SET SESSION CHARACTERISTICS AS TRANSACTION ISOLATION LEVEL {}"
1464
+ QUERY_BAND = "SET QUERY_BAND = {} FOR {}"
1465
+ UDFSEARCHPATH = "SET SESSION UDFSEARCHPATH = {} FOR FUNCTION = {}"
1466
+
1467
+ class SessionParamsPythonNames:
1468
+ # Holds the SQL Statements for Session params.
1469
+ TIMEZONE = "Session Time Zone"
1470
+ ACCOUNT = "Account Name"
1471
+ CALENDAR = "Calendar"
1472
+ COLLATION = "Collation"
1473
+ DATABASE = "Current DataBase"
1474
+ DATEFORM = 'Current DateForm'
1475
+