teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (96) hide show
  1. teradataml/README.md +210 -0
  2. teradataml/__init__.py +1 -1
  3. teradataml/_version.py +1 -1
  4. teradataml/analytics/analytic_function_executor.py +162 -76
  5. teradataml/analytics/byom/__init__.py +1 -1
  6. teradataml/analytics/json_parser/__init__.py +2 -0
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
  8. teradataml/analytics/json_parser/metadata.py +22 -4
  9. teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
  10. teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
  11. teradataml/analytics/sqle/__init__.py +3 -0
  12. teradataml/analytics/utils.py +4 -1
  13. teradataml/automl/__init__.py +2369 -464
  14. teradataml/automl/autodataprep/__init__.py +15 -0
  15. teradataml/automl/custom_json_utils.py +184 -112
  16. teradataml/automl/data_preparation.py +113 -58
  17. teradataml/automl/data_transformation.py +154 -53
  18. teradataml/automl/feature_engineering.py +113 -53
  19. teradataml/automl/feature_exploration.py +548 -25
  20. teradataml/automl/model_evaluation.py +260 -32
  21. teradataml/automl/model_training.py +399 -206
  22. teradataml/clients/auth_client.py +2 -2
  23. teradataml/common/aed_utils.py +11 -2
  24. teradataml/common/bulk_exposed_utils.py +4 -2
  25. teradataml/common/constants.py +62 -2
  26. teradataml/common/garbagecollector.py +50 -21
  27. teradataml/common/messagecodes.py +47 -2
  28. teradataml/common/messages.py +19 -1
  29. teradataml/common/sqlbundle.py +23 -6
  30. teradataml/common/utils.py +116 -10
  31. teradataml/context/aed_context.py +16 -10
  32. teradataml/data/Employee.csv +5 -0
  33. teradataml/data/Employee_Address.csv +4 -0
  34. teradataml/data/Employee_roles.csv +5 -0
  35. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  36. teradataml/data/byom_example.json +5 -0
  37. teradataml/data/creditcard_data.csv +284618 -0
  38. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  39. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
  40. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
  42. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  43. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
  44. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
  45. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
  46. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
  47. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
  48. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
  49. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
  50. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
  51. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
  52. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
  53. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
  54. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  55. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  56. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  57. teradataml/data/load_example_data.py +29 -11
  58. teradataml/data/payment_fraud_dataset.csv +10001 -0
  59. teradataml/data/teradataml_example.json +67 -0
  60. teradataml/dataframe/copy_to.py +714 -54
  61. teradataml/dataframe/dataframe.py +1153 -33
  62. teradataml/dataframe/dataframe_utils.py +8 -3
  63. teradataml/dataframe/functions.py +168 -1
  64. teradataml/dataframe/setop.py +4 -1
  65. teradataml/dataframe/sql.py +141 -9
  66. teradataml/dbutils/dbutils.py +470 -35
  67. teradataml/dbutils/filemgr.py +1 -1
  68. teradataml/hyperparameter_tuner/optimizer.py +456 -142
  69. teradataml/lib/aed_0_1.dll +0 -0
  70. teradataml/lib/libaed_0_1.dylib +0 -0
  71. teradataml/lib/libaed_0_1.so +0 -0
  72. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  73. teradataml/scriptmgmt/UserEnv.py +234 -34
  74. teradataml/scriptmgmt/lls_utils.py +43 -17
  75. teradataml/sdk/_json_parser.py +1 -1
  76. teradataml/sdk/api_client.py +9 -6
  77. teradataml/sdk/modelops/_client.py +3 -0
  78. teradataml/series/series.py +12 -7
  79. teradataml/store/feature_store/constants.py +601 -234
  80. teradataml/store/feature_store/feature_store.py +2886 -616
  81. teradataml/store/feature_store/mind_map.py +639 -0
  82. teradataml/store/feature_store/models.py +5831 -214
  83. teradataml/store/feature_store/utils.py +390 -0
  84. teradataml/table_operators/table_operator_util.py +1 -1
  85. teradataml/table_operators/templates/dataframe_register.template +6 -2
  86. teradataml/table_operators/templates/dataframe_udf.template +6 -2
  87. teradataml/utils/docstring.py +527 -0
  88. teradataml/utils/dtypes.py +93 -0
  89. teradataml/utils/internal_buffer.py +2 -2
  90. teradataml/utils/utils.py +41 -2
  91. teradataml/utils/validators.py +694 -17
  92. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
  93. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
  94. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
  95. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
  96. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
@@ -1,6 +1,6 @@
1
1
  # ##################################################################
2
2
  #
3
- # Copyright 2023 Teradata. All rights reserved.
3
+ # Copyright 2025 Teradata. All rights reserved.
4
4
  # TERADATA CONFIDENTIAL AND TRADE SECRET
5
5
  #
6
6
  # Primary Owner: Kesavaragavan B (kesavaragavan.b@Teradata.com)
@@ -87,7 +87,24 @@ class _BaseSearch:
87
87
  "SVM": "newdata", "XGBoost": "newdata",
88
88
  "NaiveBayesTextClassifierTrainer": "newdata",
89
89
  "DecisionTree": "data", "KMeans": "data",
90
- "LinReg": "data", "LogReg": "data", "PCA": "data"}
90
+ "LinReg": "data", "LogReg": "data", "PCA": "data",
91
+ "LinearRegression": "data", "Lasso": "data",
92
+ "Ridge": "data", "ARDRegression": "data",
93
+ "BayesianRidge": "data", "TweedieRegressor": "data",
94
+ "TheilSenRegressor": "data", "SGDRegressor": "data",
95
+ "RidgeCV": "data", "RANSACRegressor": "data",
96
+ "PoissonRegressor": "data", "PassiveAggressiveRegressor": "data",
97
+ "OrthogonalMatchingPursuitCV": "data", "OrthogonalMatchingPursuit": "data",
98
+ "MultiTaskLassoCV": "data", "MultiTaskLasso": "data",
99
+ "MultiTaskElasticNetCV": "data", "MultiTaskElasticNet": "data",
100
+ "LassoLarsIC": "data", "LassoLarsCV": "data", "LassoLars": "data",
101
+ "LassoCV": "data", "LarsCV": "data", "Lars": "data",
102
+ "HuberRegressor": "data", "GammaRegressor": "data",
103
+ "ElasticNetCV": "data", "ElasticNet": "data",
104
+ "LogisticRegression": "data", "RidgeClassifier": "data",
105
+ "RidgeClassifierCV": "data", "SGDClassifier": "data",
106
+ "PassiveAggressiveClassifier": "data", "Perceptron": "data",
107
+ "LogisticRegressionCV": "data"}
91
108
 
92
109
  self._UAF_TRAINABLE_FUNCS = {"ArimaEstimate", "LinearRegr", "MAMean",
93
110
  "MultivarRegr", "SimpleExp"}
@@ -120,8 +137,34 @@ class _BaseSearch:
120
137
  'MACRO-F1': True,
121
138
  'WEIGHTED-PRECISION': True,
122
139
  'WEIGHTED-RECALL': True,
123
- 'WEIGHTED-F1': True}
124
-
140
+ 'WEIGHTED-F1': True,
141
+ 'SILHOUETTE': True,
142
+ 'CALINSKI': True,
143
+ 'DAVIES': True}
144
+
145
+ # OpenSource ML function comparator (excluding MPD, MGD, MTD, RMSE, RMSLE)
146
+ self.__osml_func_comparator = {k: v for k, v in self.__func_comparator.items()
147
+ if k not in ['MPD', 'MGD', 'MTD', 'RMSE', 'RMSLE']}
148
+
149
+ # Linear model categorization lists for sklearn models
150
+ self._LINEAR_REGRESSION_MODELS = {
151
+ "ARDRegression", "BayesianRidge", "TweedieRegressor", "TheilSenRegressor",
152
+ "SGDRegressor", "RidgeCV", "Ridge", "RANSACRegressor", "PoissonRegressor",
153
+ "PassiveAggressiveRegressor", "OrthogonalMatchingPursuitCV", "OrthogonalMatchingPursuit",
154
+ "MultiTaskLassoCV", "MultiTaskLasso", "MultiTaskElasticNetCV", "MultiTaskElasticNet",
155
+ "LinearRegression", "LassoLarsIC", "LassoLarsCV", "LassoLars", "LassoCV",
156
+ "Lasso", "LarsCV", "Lars", "HuberRegressor", "GammaRegressor",
157
+ "ElasticNetCV", "ElasticNet"
158
+ }
159
+
160
+ self._LINEAR_CLASSIFICATION_MODELS = {
161
+ "SGDClassifier", "RidgeClassifierCV", "RidgeClassifier", "Perceptron",
162
+ "PassiveAggressiveClassifier", "LogisticRegressionCV", "LogisticRegression"
163
+ }
164
+
165
+ self._CLUSTERING_MODELS = {
166
+ "KMeans", "GaussianMixture"
167
+ }
125
168
  self.__func = func
126
169
  self.__params = params
127
170
  # "self.__best_model" contains best model.
@@ -178,47 +221,67 @@ class _BaseSearch:
178
221
  # '__parallel_stop_event' is used to stop threads in parallel execution.
179
222
  self.__parallel_stop_event = None
180
223
 
181
- # Get the function name.
182
- self.__func_name = func._tdml_valib_name if "_VALIB" in str(func.__class__) \
183
- else func.__name__
184
-
224
+
185
225
  # Set the function feature type and supported functionality.
186
226
  self.__is_sqle_function = False
187
227
  self.__is_uaf_function = False
188
228
  self.__is_val_function = True if "valib" in str(self.__func.__module__)\
189
229
  else False
190
-
191
- if self.__func_name in self._VAL_TRAINABLE_FUNCS and self.__is_val_function:
192
- # TODO: Enable these feature once merge model supports VAL functions.
193
- # This case is for VAL model trainer functions.
194
- self.__is_trainable = self.__is_evaluatable = \
195
- self.__is_predictable = False
196
- elif self.__func_name in self._UAF_TRAINABLE_FUNCS:
197
- # TODO: Enable these feature once merge model supports UAF functions.
198
- # This case is for UAF model trainer functions.
199
- self.__is_uaf_function = self.__is_trainable = \
200
- self.__is_evaluatable = False
201
- self.__is_predictable = False
202
- elif self.__func_name in self._SQLE_TRAINABLE_FUNCS:
203
- # This case is for SQLE model trainer functions.
204
- self.__is_sqle_function = self.__is_trainable = \
205
- self.__is_evaluatable = self.__is_predictable = True
230
+ self.__is_opensource_model = False
231
+ self.__is_clustering_model = False
232
+ self.__is_regression_model = False
233
+ self.__is_classification_model = False
234
+ self.model_id_counter = {}
235
+
236
+ # Import sklearn wrapper class for proper type checking
237
+ from teradataml.opensource._sklearn import _SkLearnObjectWrapper
238
+
239
+ if hasattr(func, "modelObj") and isinstance(func, _SkLearnObjectWrapper):
240
+ self.__is_opensource_model = True
241
+ self.__is_trainable = True
242
+ self.__is_evaluatable = True
243
+ self.__is_predictable = True
244
+
245
+ # Set the function name and class
246
+ self.__func_name = func.modelObj.__class__.__name__ # e.g., 'KMeans'
247
+ self.__func = func.__class__
248
+ if self.__func_name in self._CLUSTERING_MODELS:
249
+ self.__is_clustering_model = True
250
+ self.__is_evaluatable = False
251
+ elif self.__func_name in self._LINEAR_REGRESSION_MODELS:
252
+ self.__is_regression_model = True
253
+ elif self.__func_name in self._LINEAR_CLASSIFICATION_MODELS:
254
+ self.__is_classification_model = True
206
255
  else:
207
- # This case is for non-model trainer functions.
208
- self.__is_trainable = self.__is_evaluatable = \
209
- self.__is_predictable = False
210
-
211
-
212
- # Unsupervised model cannot perform evaluation. So, disable evaluation
213
- # functionality.
214
- self.__is_evaluatable = False if not self.__is_evaluatable or \
215
- self.__func_name in self.__US_TRAINABLE_FUNCS else \
216
- True
217
-
256
+ self.__func_name = func._tdml_valib_name if "_VALIB" in str(func.__class__) \
257
+ else func.__name__
258
+ if self.__func_name in self._VAL_TRAINABLE_FUNCS and self.__is_val_function:
259
+ # TODO: Enable these feature once merge model supports VAL functions.
260
+ # This case is for VAL model trainer functions.
261
+ self.__is_trainable = self.__is_evaluatable = \
262
+ self.__is_predictable = False
263
+ elif self.__func_name in self._UAF_TRAINABLE_FUNCS:
264
+ # TODO: Enable these feature once merge model supports UAF functions.
265
+ # This case is for UAF model trainer functions.
266
+ self.__is_uaf_function = self.__is_trainable = \
267
+ self.__is_evaluatable = False
268
+ self.__is_predictable = False
269
+ elif self.__func_name in self._SQLE_TRAINABLE_FUNCS:
270
+ # This case is for SQLE model trainer functions.
271
+ self.__is_sqle_function = self.__is_trainable = \
272
+ self.__is_evaluatable = self.__is_predictable = True
273
+ else:
274
+ # This case is for non-model trainer functions.
275
+ self.__is_trainable = self.__is_evaluatable = \
276
+ self.__is_predictable = False
277
+
278
+ self.__is_evaluatable = False if not self.__is_evaluatable or \
279
+ self.__func_name in self.__US_TRAINABLE_FUNCS else \
280
+ True
218
281
  # Set train routine based on model type.
219
282
  # Non-model trainer routine is used for unsupervised model function training.
220
283
  self._execute_fit = self.__model_trainer_routine if self.__is_trainable \
221
- and self.__is_evaluatable else \
284
+ and (self.__is_evaluatable or self.__is_clustering_model) else \
222
285
  self.__non_model_trainer_routine
223
286
 
224
287
  # Utility lambda functions.
@@ -266,6 +329,9 @@ class _BaseSearch:
266
329
  self._get_model_trainer_train_data_arg = lambda : "train_data" if \
267
330
  self.__func_name == "KNN" else "data"
268
331
 
332
+ # '_get_predict_column' function is used to generate prediction column name.
333
+ self._get_predict_column = lambda: f"{self.__func_name.lower()}_predict_1"
334
+
269
335
  if self.__is_trainable and "data" in self.__params:
270
336
  data = self.__params.pop("data")
271
337
  self.__validate_model_trainer_input_data_argument(data, False)
@@ -545,7 +611,6 @@ class _BaseSearch:
545
611
  """
546
612
  return self.__sampled_df_mapper[self.__best_data_id]
547
613
 
548
-
549
614
  @property
550
615
  def best_data_id(self):
551
616
  """
@@ -592,7 +657,7 @@ class _BaseSearch:
592
657
 
593
658
  """
594
659
 
595
- if not self.__is_evaluatable:
660
+ if not (self.__is_evaluatable or self.__is_clustering_model):
596
661
  # Raise error when "model_stats" attribute accessed for non-executable
597
662
  # functions.
598
663
  err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
@@ -635,7 +700,6 @@ class _BaseSearch:
635
700
 
636
701
  return self.__model_stats
637
702
 
638
-
639
703
  def is_running(self):
640
704
  """
641
705
  DESCRIPTION:
@@ -665,7 +729,6 @@ class _BaseSearch:
665
729
  # both parallel and sequential execution.
666
730
  return self.__is_model_training_completed()
667
731
 
668
-
669
732
  def _add_data_label(self, arg_name=None):
670
733
  """
671
734
  DESCRIPTION:
@@ -765,7 +828,6 @@ class _BaseSearch:
765
828
 
766
829
  return _labeled_data
767
830
 
768
-
769
831
  def __perform_train_test_sampling(self, data, frac, stratify_column=None,
770
832
  sample_id_column=None, sample_seed=None):
771
833
  """
@@ -995,8 +1057,71 @@ class _BaseSearch:
995
1057
  # Validate DataFrames.
996
1058
  arg_info_matrix.append(["data", data, is_optional_arg, (DataFrame)])
997
1059
  _Validators._validate_function_arguments(arg_info_matrix)
1060
+
1061
+ def _regression_metrics(self, y_true, y_pred):
1062
+ from teradataml import td_sklearn as skl
998
1063
 
999
-
1064
+ ME = skl.max_error(y_true=y_true, y_pred=y_pred)
1065
+
1066
+ MAE = skl.mean_absolute_error(y_true=y_true, y_pred=y_pred)
1067
+
1068
+ MSE = skl.mean_squared_error(y_true=y_true, y_pred=y_pred, squared=False)
1069
+
1070
+ try:
1071
+ MSLE = skl.mean_squared_log_error(y_true=y_true, y_pred=y_pred)
1072
+ except:
1073
+ MSLE = "NA"
1074
+
1075
+ MAPE = skl.mean_absolute_percentage_error(y_true=y_true, y_pred=y_pred)
1076
+
1077
+ R2 = skl.r2_score(y_true=y_true, y_pred=y_pred)
1078
+
1079
+ EV = skl.explained_variance_score(y_true=y_true, y_pred=y_pred)
1080
+
1081
+ MAD = skl.median_absolute_error(y_true=y_true, y_pred=y_pred)
1082
+
1083
+ #TODO: Support for MPD, MGD, MTD will be added in next phase.
1084
+ # Support for RMSE, RMSLE will be added after OpenSourceML scikit-learn version
1085
+ # update as it requires higher version(>1.1.3)
1086
+ """MPD = skl.mean_poisson_deviance(y_true, y_pred)
1087
+ MGD = skl.mean_gamma_deviance(y_true, y_pred)
1088
+ MTD = skl.mean_tweedie_deviance(y_true, y_pred)"""
1089
+
1090
+ keys = ["MAE", "MSE", "MSLE", "MAPE", "R2", "EV", "ME", "MAD"]
1091
+ values = [MAE, MSE, MSLE, MAPE, R2, EV, ME, MAD]
1092
+ return dict(zip(keys, values))
1093
+
1094
+ def _classification_metrics(self, y_true, y_pred):
1095
+ from teradataml import td_sklearn as skl
1096
+
1097
+ # Basic classification metrics
1098
+ accuracy = skl.accuracy_score(y_true=y_true, y_pred=y_pred)
1099
+
1100
+ # Precision, Recall, F1 (micro, macro, weighted averages)
1101
+ micro_precision = skl.precision_score(y_true=y_true, y_pred=y_pred, average='micro')
1102
+ micro_recall = skl.recall_score(y_true=y_true, y_pred=y_pred, average='micro')
1103
+ micro_f1 = skl.f1_score(y_true=y_true, y_pred=y_pred, average='micro')
1104
+
1105
+ macro_precision = skl.precision_score(y_true=y_true, y_pred=y_pred, average='macro')
1106
+ macro_recall = skl.recall_score(y_true=y_true, y_pred=y_pred, average='macro')
1107
+ macro_f1 = skl.f1_score(y_true=y_true, y_pred=y_pred, average='macro')
1108
+
1109
+ weighted_precision = skl.precision_score(y_true=y_true, y_pred=y_pred, average='weighted')
1110
+ weighted_recall = skl.recall_score(y_true=y_true, y_pred=y_pred, average='weighted')
1111
+ weighted_f1 = skl.f1_score(y_true=y_true, y_pred=y_pred, average='weighted')
1112
+
1113
+ keys = [
1114
+ "ACCURACY", "MICRO-PRECISION", "MICRO-RECALL", "MICRO-F1",
1115
+ "MACRO-PRECISION", "MACRO-RECALL", "MACRO-F1",
1116
+ "WEIGHTED-PRECISION", "WEIGHTED-RECALL", "WEIGHTED-F1"
1117
+ ]
1118
+ values = [
1119
+ accuracy, micro_precision, micro_recall, micro_f1,
1120
+ macro_precision, macro_recall, macro_f1,
1121
+ weighted_precision, weighted_recall, weighted_f1
1122
+ ]
1123
+ return dict(zip(keys, values))
1124
+
1000
1125
  def fit(self,
1001
1126
  data=None,
1002
1127
  evaluation_metric=None,
@@ -1051,6 +1176,7 @@ class _BaseSearch:
1051
1176
  * evaluation_metric applicable for model trainer functions.
1052
1177
  * Best model is not selected when evaluation returns
1053
1178
  non-finite values.
1179
+ * MPD, MGD, RMSE, RMSLE are not supported for OpenSourceML models.
1054
1180
  Permitted Values:
1055
1181
  * Classification: Accuracy, Micro-Precision, Micro-Recall,
1056
1182
  Micro-F1, Macro-Precision, Macro-Recall,
@@ -1059,10 +1185,11 @@ class _BaseSearch:
1059
1185
  Weighted-F1.
1060
1186
  * Regression: MAE, MSE, MSLE, MAPE, MPE, RMSE, RMSLE, ME,
1061
1187
  R2, EV, MPD, MGD
1062
-
1188
+ * Clustering: SILHOUETTE
1063
1189
  Default Value:
1064
1190
  * Classification: Accuracy
1065
1191
  * Regression: MAE
1192
+ * Clustering: SILHOUETTE
1066
1193
  Types: str
1067
1194
 
1068
1195
  early_stop:
@@ -1241,7 +1368,9 @@ class _BaseSearch:
1241
1368
  arg_info_matrix.append(["run_parallel", run_parallel, True, (bool)])
1242
1369
  arg_info_matrix.append(["wait", wait, True, (bool)])
1243
1370
  arg_info_matrix.append(["evaluation_metric", evaluation_metric, True,
1244
- (str), True, list(self.__func_comparator)])
1371
+ (str), True, list(self.__osml_func_comparator)
1372
+ if self.__is_opensource_model
1373
+ else list(self.__func_comparator)])
1245
1374
  arg_info_matrix.append(["verbose", verbose, True, (int), True, [0,1,2]])
1246
1375
  arg_info_matrix.append(["max_time", max_time, True, (int, float)])
1247
1376
 
@@ -1260,8 +1389,8 @@ class _BaseSearch:
1260
1389
 
1261
1390
  # When "evaluation_metric" is 'MPE' then use the spl comparators.
1262
1391
  if self.__evaluation_metric == "MPE":
1263
- self._is_best_metrics = self._is_early_stoppable = self._spl_abs_comparator
1264
-
1392
+ self._is_best_metrics = self._is_early_stoppable = self._spl_abs_comparator
1393
+
1265
1394
  if not isinstance(self.__model_trainer_input_data, dict):
1266
1395
  # Sample all the labeled data for model training and testing.
1267
1396
  self.__perform_train_test_sampling(self._labeled_data, frac, stratify_column,
@@ -1277,6 +1406,27 @@ class _BaseSearch:
1277
1406
 
1278
1407
  self.__eval_params = kwargs if self.__is_evaluatable else None
1279
1408
 
1409
+ elif self.__is_trainable and self.__is_opensource_model:
1410
+
1411
+ if self.__is_clustering_model:
1412
+ self.__sampled_df_mapper = self._add_data_label("data")
1413
+ # Update model trainer function parameter grid.
1414
+ self.__update_model_parameters()
1415
+ elif self.__is_regression_model or self.__is_classification_model:
1416
+ # Open-source regression model: perform train-test split
1417
+
1418
+ if not isinstance(self.__model_trainer_input_data, dict):
1419
+ self.__perform_train_test_sampling(self._labeled_data, frac, stratify_column,
1420
+ sample_id_column, sample_seed)
1421
+ elif isinstance(self.__model_trainer_input_data, dict):
1422
+ self.__perform_train_test_sampling(self.__model_trainer_input_data, frac,
1423
+ stratify_column, sample_id_column,
1424
+ sample_seed)
1425
+ # Set evaluation parameters for supervised models
1426
+ self.__eval_params = kwargs if self.__is_evaluatable else None
1427
+
1428
+ self.__update_model_parameters()
1429
+
1280
1430
  elif self.__is_trainable and not self.__is_evaluatable:
1281
1431
  # This condition identifies unsupervised model trainer function.
1282
1432
  # Let's process training data.
@@ -1285,13 +1435,14 @@ class _BaseSearch:
1285
1435
  self.__sampled_df_mapper = self._add_data_label("data")
1286
1436
  # Update model trainer function parameter grid.
1287
1437
  self.__update_model_parameters()
1288
-
1289
1438
  # Initialize logging.
1290
1439
  if verbose > 0:
1291
1440
  self.__progress_bar = _ProgressBar(jobs=len(self._parameter_grid), verbose=verbose)
1441
+
1292
1442
  # With VT option Parallel execution won't be possible, as it opens multiple connections.
1293
1443
  if not run_parallel or configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
1294
1444
  # Setting start time of Sequential execution.
1445
+
1295
1446
  self.__start_time = time.time() if self.__timeout is not None else None
1296
1447
  # TODO: Factorize the code once parallel execution part is completed in ELE-6154 JIRA.
1297
1448
  # Execute all parameters from populated parameter grid for both trainable
@@ -1301,7 +1452,7 @@ class _BaseSearch:
1301
1452
 
1302
1453
  # Condition to check early stop feature applicable for model
1303
1454
  # trainer function.
1304
- if self.__early_stop is not None and self.__is_evaluatable:
1455
+ if self.__early_stop is not None and (self.__is_evaluatable or self.__is_clustering_model):
1305
1456
  if self.__is_finite and self._is_early_stoppable():
1306
1457
  # Terminate HPT execution when the trained model attains the
1307
1458
  # given "early_stop" value.
@@ -1390,28 +1541,44 @@ class _BaseSearch:
1390
1541
  EXAMPLES:
1391
1542
  >>> self.__model_trainer_routine(param=param, iter=iter, **kwargs)
1392
1543
  """
1393
-
1394
1544
  # Define model name used for model metadata.
1545
+
1395
1546
  model_name = self._generate_model_name(iter)
1396
1547
  # Get the unique data identifier present in "model_param".
1397
1548
  _data_id = model_param[self.__DATA_ID]
1398
1549
  # 'param' variable holds model training parameters and train dataframe.
1399
1550
  # Get the model training parameters.
1400
- param = model_param["param"]
1401
-
1551
+
1552
+ if self.__is_opensource_model:
1553
+ param_outer = model_param.get("param", {})
1554
+ param = param_outer.get("param", param_outer)
1555
+ data_input = param.pop("data", None)
1556
+ param = {k: v for k, v in param.items() if k != "data"}
1557
+ else:
1558
+ param = model_param["param"]
1559
+ data_input = None
1560
+
1402
1561
  # Check the stop_event set or not
1403
1562
  if self.__parallel_stop_event is not None and self.__parallel_stop_event.is_set():
1404
1563
  # Update the model metadata for Skip execution.
1405
- self.__update_model_metadata(model_name, param, "SKIP", 0, _data_id)
1564
+ self.__update_model_metadata(model_name, param, "SKIP", 0, 0, 0, _data_id)
1406
1565
  return
1407
1566
 
1408
1567
  # Retrieve the train and test data using data identifier.
1409
- _train_data, _test_data = self.__sampled_df_mapper[_data_id]
1410
-
1411
- # Update model training argument with train DataFrame.
1412
- param.update(_train_data)
1413
- # Update the test DataFrame for model evaluation.
1414
- kwargs.update(_test_data)
1568
+ if self.__is_opensource_model:
1569
+
1570
+ if self.__is_clustering_model:
1571
+ _train_data = self.__sampled_df_mapper[_data_id]
1572
+ _test_data = {} # No label needed
1573
+ elif self.__is_regression_model or self.__is_classification_model:
1574
+ _train_data, _test_data = self.__sampled_df_mapper[_data_id]
1575
+ kwargs.update(_test_data)
1576
+ else:
1577
+ _train_data, _test_data = self.__sampled_df_mapper[_data_id]
1578
+ # Update model training argument with train DataFrame.
1579
+ param.update(_train_data)
1580
+ # Update the test DataFrame for model evaluation.
1581
+ kwargs.update(_test_data)
1415
1582
 
1416
1583
  try:
1417
1584
  # Record starting time of model training.
@@ -1421,44 +1588,122 @@ class _BaseSearch:
1421
1588
  # using getattr method.
1422
1589
  self.__func = valib.__getattr__(self.__func_name)
1423
1590
  # Train the model.
1424
- func_obj = self.__func(**param)
1425
- # Evaluate the trained model.
1426
- evaluations = func_obj.evaluate(**kwargs)
1591
+ if self.__is_opensource_model:
1592
+ from teradataml import td_sklearn as skl
1593
+ func_class = getattr(skl, self.__func_name) # e.g., skl.KMeans
1594
+ if self.__is_regression_model or self.__is_classification_model:
1595
+ # Extract and remove only for regression models
1596
+ self.__input_columns = param.pop("input_columns", None)
1597
+ self.__response_column = param.pop("response_column", None)
1598
+
1599
+ func_obj = func_class(**param) # Safely create model instance
1600
+ else:
1601
+ func_obj = self.__func(**param)
1602
+ end_time = time.perf_counter()
1603
+ training_time = round((end_time - start_time), 3)
1427
1604
  # Store the trained object.
1428
1605
  self.__trained_models[model_name] = func_obj
1429
- # Process training time.
1430
- training_time = round((time.perf_counter() - start_time), 3)
1606
+
1607
+ if self.__is_opensource_model and self.__is_clustering_model:
1608
+ start_time_cluster = time.perf_counter()
1609
+ from teradataml import td_sklearn as skl
1610
+ feature_cols = [col for col in _train_data["data"].columns]
1611
+ func_obj.fit(data=_train_data["data"], feature_columns=feature_cols)
1612
+ pred_col = self._get_predict_column()
1613
+ result = func_obj.predict(data=_train_data["data"], feature_columns=feature_cols)
1614
+ result.materialize()
1431
1615
 
1432
- # Extract evaluations report in dictionary format.
1433
- if "RegressionEvaluator" in type(evaluations).__name__:
1434
- # RegressionEvaluator results are stored under "result" attribute.
1435
- # "result" dataframe column names are metrics and corresponding
1436
- # rows are evaluation values.
1437
- columns = evaluations.result.keys()
1438
- eval_values = evaluations.result.get_values()[0]
1616
+ silhouette = skl.silhouette_score(
1617
+ X=result.select(feature_cols),
1618
+ labels=result.select([pred_col])
1619
+ )
1620
+
1621
+ calinski = skl.calinski_harabasz_score(
1622
+ X=result.select(feature_cols),
1623
+ labels=result.select([pred_col])
1624
+ )
1625
+
1626
+ davies = skl.davies_bouldin_score(
1627
+ X=result.select(feature_cols),
1628
+ labels=result.select([pred_col])
1629
+ )
1630
+
1631
+ columns = ["SILHOUETTE", "CALINSKI", "DAVIES"]
1632
+ eval_values = [silhouette, calinski, davies]
1633
+ eval_key_values = dict(zip(columns, eval_values))
1634
+
1635
+ end_time_cluster = time.perf_counter()
1636
+ training_time_cluster = round((end_time_cluster - start_time_cluster), 3)
1439
1637
 
1440
- # Default evaluation metric is set to "MAE" for Regression models.
1441
1638
  if self.__evaluation_metric is None:
1442
- self.__evaluation_metric = "MAE"
1443
-
1639
+ self.__evaluation_metric = "SILHOUETTE"
1640
+
1641
+ self.__update_model_metadata(model_name, param, "PASS", training_time_cluster,
1642
+ end_time_cluster, start_time_cluster, _data_id, eval_key_values)
1643
+ elif self.__is_opensource_model and (self.__is_regression_model or self.__is_classification_model):
1644
+ start_time_lin = time.perf_counter()
1645
+ train_df = _train_data["data"]
1646
+ y = train_df.select([self.__response_column])
1647
+ X = train_df.drop(columns=[self.__response_column], axis=1)
1648
+
1649
+ func_obj.fit(X,y)
1650
+ pred_col = self._get_predict_column()
1651
+
1652
+ output = func_obj.predict(X,y)
1653
+
1654
+ y_true = output.select([self.__response_column])
1655
+ y_pred = output.select([pred_col])
1656
+
1657
+ if self.__is_regression_model:
1658
+ eval_key_values = self._regression_metrics(y_true, y_pred)
1659
+ if self.__evaluation_metric is None:
1660
+ self.__evaluation_metric = "MAE"
1661
+ elif self.__is_classification_model:
1662
+ eval_key_values = self._classification_metrics(y_true, y_pred)
1663
+ if self.__evaluation_metric is None:
1664
+ self.__evaluation_metric = "ACCURACY"
1665
+
1666
+ end_time_lin = time.perf_counter()
1667
+ training_time_lin = round((end_time_lin - start_time_lin), 3)
1668
+
1669
+ self.__update_model_metadata(model_name, param, "PASS", training_time_lin,
1670
+ end_time_lin, start_time_lin, _data_id, eval_key_values)
1444
1671
  else:
1445
- # ClassificationEvaluator results are stored under "output_data"
1446
- # attribute. "output_data" dataframe 'column 1' contains metrics
1447
- # and 'column 2' holds corresponding evaluation values.
1448
- eval_report = evaluations.output_data.get_values().transpose()
1449
- columns = eval_report[1].astype('str')
1450
- columns = [column_name.upper() for column_name in columns]
1451
- eval_values = eval_report[2]
1452
-
1453
- # Default evaluation metric is set to "ACCURACY" for
1454
- # classification models.
1455
- if self.__evaluation_metric is None:
1456
- self.__evaluation_metric = "ACCURACY"
1457
-
1458
- # Update the model metadata for successful model training.
1459
- self.__update_model_metadata(model_name, param, "PASS",
1460
- training_time, _data_id,
1461
- columns, eval_values)
1672
+ # Evaluate the trained model.
1673
+ evaluations = func_obj.evaluate(**kwargs)
1674
+ # Extract evaluations report in dictionary format.
1675
+ if "RegressionEvaluator" in type(evaluations).__name__:
1676
+ # RegressionEvaluator results are stored under "result" attribute.
1677
+ # "result" dataframe column names are metrics and corresponding
1678
+ # rows are evaluation values.
1679
+ columns = evaluations.result.keys()
1680
+ eval_values = evaluations.result.get_values()[0]
1681
+
1682
+ # Default evaluation metric is set to "MAE" for Regression models.
1683
+ if self.__evaluation_metric is None:
1684
+ self.__evaluation_metric = "MAE"
1685
+
1686
+ else:
1687
+ # ClassificationEvaluator results are stored under "output_data"
1688
+ # attribute. "output_data" dataframe 'column 1' contains metrics
1689
+ # and 'column 2' holds corresponding evaluation values.
1690
+ eval_report = evaluations.output_data.get_values().transpose()
1691
+ columns = eval_report[1].astype('str')
1692
+ columns = [column_name.upper() for column_name in columns]
1693
+ eval_values = eval_report[2]
1694
+
1695
+ # Default evaluation metric is set to "ACCURACY" for
1696
+ # classification models.
1697
+ if self.__evaluation_metric is None:
1698
+ self.__evaluation_metric = "ACCURACY"
1699
+
1700
+ # Combine columns and eval_values into a dictionary
1701
+ eval_key_values = dict(zip(columns, eval_values))
1702
+ # Update the model metadata for successful model training.
1703
+ self.__update_model_metadata(model_name, param, "PASS",
1704
+ training_time, end_time, start_time,
1705
+ _data_id, eval_key_values)
1706
+
1462
1707
 
1463
1708
  # Check whether self.__parallel_stop_event is None or not
1464
1709
  if self.__parallel_stop_event is not None:
@@ -1468,18 +1713,18 @@ class _BaseSearch:
1468
1713
  if (self.__early_stop is not None and self._is_early_stoppable())\
1469
1714
  or (self.__timeout is not None and self._is_time_stoppable()):
1470
1715
  self.__parallel_stop_event.set()
1471
-
1716
+
1472
1717
  except Exception as _err_msg:
1473
1718
  # Record error message with corresponding "model_name".
1474
1719
  self.__model_err_records[model_name] = str(_err_msg)
1475
1720
  # Compute the failed execution time for failed training.
1476
- training_time = round((time.perf_counter() - start_time), 3)
1721
+ end_time = time.perf_counter()
1722
+ training_time = round((end_time - start_time), 3)
1477
1723
  # Update the model metadata for failed execution.
1478
- self.__update_model_metadata(model_name, param, "FAIL", training_time,
1479
- _data_id)
1724
+ self.__update_model_metadata(model_name, param, "FAIL", training_time,
1725
+ end_time, start_time, _data_id)
1480
1726
  pass
1481
1727
 
1482
-
1483
1728
  def __non_model_trainer_routine(self, model_param, iter, **kwargs):
1484
1729
  """
1485
1730
  DESCRIPTION:
@@ -1549,7 +1794,7 @@ class _BaseSearch:
1549
1794
  # Check the stop_event set or not
1550
1795
  if self.__parallel_stop_event is not None and self.__parallel_stop_event.is_set():
1551
1796
  # Update the model metadata for Skip execution.
1552
- self.__update_model_metadata(model_name, param, "SKIP", 0, _data_id)
1797
+ self.__update_model_metadata(model_name, param, "SKIP", 0, 0, 0, _data_id)
1553
1798
  return
1554
1799
  try:
1555
1800
  # Record starting time of model training.
@@ -1566,17 +1811,19 @@ class _BaseSearch:
1566
1811
  self.__trained_models[model_name] = func_obj
1567
1812
 
1568
1813
  # Process training time.
1569
- training_time = round((time.perf_counter() - start_time), 3)
1814
+ end_time = time.perf_counter()
1815
+ training_time = round((end_time - start_time), 3)
1570
1816
  # Update the model metadata for successful model training.
1571
1817
 
1572
- self.__update_model_metadata(model_name, param, "PASS", training_time, _data_id)
1818
+ self.__update_model_metadata(model_name, param, "PASS", training_time, end_time, start_time, _data_id)
1573
1819
  except Exception as _err_msg:
1574
1820
  # Record error message with corresponding "model_name".
1575
1821
  self.__model_err_records[model_name] = str(_err_msg)
1576
1822
  # Compute the failed execution time for failed training.
1577
- training_time = round((time.perf_counter() - start_time), 3)
1823
+ end_time = time.perf_counter()
1824
+ training_time = round((end_time - start_time), 3)
1578
1825
  # Update the model metadata for failed execution.
1579
- self.__update_model_metadata(model_name, param, "FAIL", training_time, _data_id)
1826
+ self.__update_model_metadata(model_name, param, "FAIL", training_time, end_time, start_time, _data_id)
1580
1827
  pass
1581
1828
 
1582
1829
  if self.__parallel_stop_event is not None:
@@ -1586,14 +1833,14 @@ class _BaseSearch:
1586
1833
  self.__parallel_stop_event.set()
1587
1834
 
1588
1835
 
1589
-
1590
1836
  def __update_model_metadata(self, model_name,
1591
1837
  param,
1592
1838
  status,
1593
1839
  training_time,
1840
+ end_time,
1841
+ start_time,
1594
1842
  data_id=None,
1595
- columns=None,
1596
- eval_values=None):
1843
+ eval_key_values=None):
1597
1844
  """
1598
1845
  DESCRIPTION:
1599
1846
  Internal function to update the model evaluation details, that are
@@ -1620,33 +1867,35 @@ class _BaseSearch:
1620
1867
  * SKIP: Function execution skipped for the chosen parameters.
1621
1868
  Types: str
1622
1869
 
1623
- data_id:
1624
- Optional Argument.
1625
- Specifies the unique data identifier used for model training.
1626
- Note:
1627
- * "data_id" is supported for model trainer functions.
1628
- Types: str
1629
-
1630
1870
  training_time:
1631
1871
  Required Argument.
1632
1872
  Specifies the model training time in seconds for both model trainer
1633
1873
  function and non-model trainer function.
1634
1874
  Types: float
1635
1875
 
1636
- columns:
1876
+ end_time:
1637
1877
  Optional Argument.
1638
- Specifies the column names retrieved from model evaluation
1639
- phase. This argument is a required argument for model trainer
1640
- function.
1641
- Types: list of string
1642
-
1643
- eval_values:
1878
+ Specifies the end time of the model training.
1879
+ Types: float
1880
+
1881
+ start_time:
1644
1882
  Optional Argument.
1645
- Specifies the evaluation results retrieved from model evaluation
1646
- phase. This argument is a required argument for model trainer
1647
- function.
1648
- Types: list of float
1883
+ Specifies the start time of the model training.
1884
+ Types: float
1649
1885
 
1886
+ data_id:
1887
+ Optional Argument.
1888
+ Specifies the unique data identifier used for model training.
1889
+ Note:
1890
+ * "data_id" is supported for model trainer functions.
1891
+ Types: str
1892
+
1893
+ eval_key_values:
1894
+ Optional Argument.
1895
+ Specifies the evaluation key values retrieved from model evaluation
1896
+ phase. This argument is a required argument for model trainer
1897
+ function.
1898
+ Types: dict.
1650
1899
 
1651
1900
  RETURNS:
1652
1901
  None
@@ -1672,17 +1921,21 @@ class _BaseSearch:
1672
1921
  model_metadata[self.__DATA_ID.upper()] = data_id
1673
1922
 
1674
1923
  # Format log message needs to displayed.
1675
- _msg = "Model_id:{},Run time:{}s,Status:{}".format(model_name,
1676
- training_time,
1924
+ _msg = "Model_id:{}, Run time:{}s, Start time:{}, End time:{}, Status:{}".format(model_name,
1925
+ training_time,
1926
+ start_time,
1927
+ end_time,
1677
1928
  status)
1678
1929
 
1679
-
1680
- if status == "PASS" and self.__is_evaluatable :
1930
+ if status == "PASS" and (self.__is_evaluatable or self.__is_clustering_model):
1681
1931
  # While execution status is 'Fail' then update the evaluation result
1682
1932
  # with 'None' values.
1683
- model_scores = dict(zip(columns, eval_values))
1933
+ model_scores = eval_key_values
1684
1934
  model_metadata.update(model_scores)
1685
- # Add additional model score to the log message.
1935
+ # Add additional model score to the log message.
1936
+ if self.__is_opensource_model and (self.__evaluation_metric is None or self.__evaluation_metric not in model_scores):
1937
+ if "SILHOUETTE" in model_scores:
1938
+ self.__evaluation_metric = "SILHOUETTE"
1686
1939
  _msg += ",{}:{}".format(self.__evaluation_metric,round(
1687
1940
  model_scores[self.__evaluation_metric], 3))
1688
1941
  # Best model updation.
@@ -1757,18 +2010,46 @@ class _BaseSearch:
1757
2010
  # identifier is passed.
1758
2011
  if not self.__is_trainable or not self.__is_predictable:
1759
2012
  err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
1760
- "execute 'predict()'","Not applicable for" \
1761
- " non-model trainer analytic functions.")
2013
+ "execute 'predict()'","Not applicable for" \
2014
+ " non-model trainer analytic functions.")
1762
2015
  raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1763
2016
 
1764
2017
  if self.__default_model is None:
1765
2018
  err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
1766
2019
  "execute 'predict()'",
1767
- "No model is set as default to set a "\
1768
- "prediction model use the 'set_model()' function.")
2020
+ "No model is set as default to set a "\
2021
+ "prediction model use the 'set_model()' function.")
1769
2022
 
1770
2023
  raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
1771
-
2024
+
2025
+ test_data = kwargs.get("newdata", None)
2026
+
2027
+ if self.__is_opensource_model and self.__is_clustering_model:
2028
+ if test_data is None:
2029
+ test_data = self.__sampled_df_mapper[self.__best_data_id]["data"]
2030
+ feature_columns = kwargs.get("feature_columns", None)
2031
+
2032
+ # If feature columns not passed, fetch from training data
2033
+ if feature_columns is None:
2034
+ if self.__best_data_id is None:
2035
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
2036
+ "fetch 'feature_columns'",
2037
+ "No training metadata found")
2038
+
2039
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2040
+ training_df = self.__sampled_df_mapper[self.__best_data_id]["data"]
2041
+ training_columns = training_df.columns
2042
+
2043
+ feature_columns = [col for col in training_columns]
2044
+
2045
+ return self.__default_model.predict(data=test_data, feature_columns=feature_columns)
2046
+ elif self.__is_opensource_model and (self.__is_regression_model or self.__is_classification_model):
2047
+ if test_data is None:
2048
+ test_data = self.__sampled_df_mapper[self.__best_data_id][1]["data"]
2049
+ y_test = test_data.select([self.__response_column])
2050
+ X_test = test_data.drop(columns=[self.__response_column], axis=1)
2051
+
2052
+ return self.__default_model.predict(X_test, y_test)
1772
2053
  # TODO Enable this method, once Merge model supports VAL, and UAF.
1773
2054
  return self.__default_model.predict(**kwargs)
1774
2055
 
@@ -1963,7 +2244,6 @@ class _BaseSearch:
1963
2244
  return self.__model_err_records.get(model_id)
1964
2245
 
1965
2246
 
1966
-
1967
2247
  def set_model(self, model_id):
1968
2248
  """
1969
2249
  DESCRIPTION:
@@ -2046,10 +2326,16 @@ class _BaseSearch:
2046
2326
  # Raise TeradataMLException error when non-model trainer function
2047
2327
  # identifier is passed.
2048
2328
  if not self.__is_trainable or not self.__is_evaluatable:
2049
- err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
2050
- "execute 'evaluate()'","Not applicable for" \
2051
- " non-model trainer analytic functions.")
2052
- raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2329
+ if not self.__is_clustering_model:
2330
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
2331
+ "execute 'evaluate()'","Not applicable for" \
2332
+ " non-model trainer analytic functions.")
2333
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2334
+ else:
2335
+ err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
2336
+ "execute 'evaluate()'","Not applicable for" \
2337
+ " clustering model functions.")
2338
+ raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2053
2339
 
2054
2340
  if self.__default_model is None:
2055
2341
  err = Messages.get_message(MessageCodes.EXECUTION_FAILED,
@@ -2058,11 +2344,35 @@ class _BaseSearch:
2058
2344
  "trained model for evaluation use "\
2059
2345
  "the 'set_model()' function.")
2060
2346
  raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
2061
-
2062
- _params = self.__eval_params if len(kwargs) == 0 else kwargs
2063
- if self._TRAINABLE_FUNCS_DATA_MAPPER[self.__func_name] not in _params:
2064
- _params.update(self.__sampled_df_mapper[self.__best_data_id][1])
2065
- return self.__default_model.evaluate(**_params)
2347
+ if self.__is_opensource_model and (self.__is_regression_model or self.__is_classification_model):
2348
+ test_data = kwargs.get("newdata", None)
2349
+
2350
+ if test_data is None:
2351
+ test_data = self.__sampled_df_mapper[self.__best_data_id][1]["data"]
2352
+
2353
+ y_test = test_data.select([self.__response_column])
2354
+ X_test = test_data.drop(columns=[self.__response_column], axis=1)
2355
+
2356
+ pred_col = self._get_predict_column()
2357
+
2358
+ output = self.__default_model.predict(X_test,y_test)
2359
+
2360
+ y_true = output.select([self.__response_column])
2361
+ y_pred = output.select([pred_col])
2362
+
2363
+ if self.__is_regression_model:
2364
+ eval_key_values = self._regression_metrics(y_true, y_pred)
2365
+ elif self.__is_classification_model:
2366
+ eval_key_values = self._classification_metrics(y_true, y_pred)
2367
+
2368
+ import pandas as pd
2369
+ result_df = pd.DataFrame([eval_key_values])
2370
+ return result_df
2371
+ else:
2372
+ _params = self.__eval_params if len(kwargs) == 0 else kwargs
2373
+ if self._TRAINABLE_FUNCS_DATA_MAPPER[self.__func_name] not in _params:
2374
+ _params.update(self.__sampled_df_mapper[self.__best_data_id][1])
2375
+ return self.__default_model.evaluate(**_params)
2066
2376
 
2067
2377
 
2068
2378
  def __populate_parameter_grid(self):
@@ -2255,6 +2565,8 @@ class _BaseSearch:
2255
2565
 
2256
2566
  if self.__is_trainable and self.__is_evaluatable and self.__is_sqle_function:
2257
2567
  self._labeled_data = self._add_data_label()
2568
+ elif self.__is_trainable and self.__is_evaluatable and not self.__is_clustering_model:
2569
+ self._labeled_data = self._add_data_label()
2258
2570
 
2259
2571
 
2260
2572
  class GridSearch(_BaseSearch):
@@ -2940,6 +3252,7 @@ class GridSearch(_BaseSearch):
2940
3252
  * evaluation_metric applicable for model trainer functions.
2941
3253
  * Best model is not selected when evaluation returns
2942
3254
  non-finite values.
3255
+ * MPD, MGD, RMSE, RMSLE are not supported for OpenSourceML models.
2943
3256
  Permitted Values:
2944
3257
  * Classification: Accuracy, Micro-Precision, Micro-Recall,
2945
3258
  Micro-F1, Macro-Precision, Macro-Recall,
@@ -3555,6 +3868,7 @@ class RandomSearch(_BaseSearch):
3555
3868
  * evaluation_metric applicable for model trainer functions.
3556
3869
  * Best model is not selected when evaluation returns
3557
3870
  non-finite values.
3871
+ * MPD, MGD, RMSE, RMSLE are not supported for OpenSourceML models.
3558
3872
  Permitted Values:
3559
3873
  * Classification: Accuracy, Micro-Precision, Micro-Recall,
3560
3874
  Micro-F1, Macro-Precision, Macro-Recall,