teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (96) hide show
  1. teradataml/README.md +210 -0
  2. teradataml/__init__.py +1 -1
  3. teradataml/_version.py +1 -1
  4. teradataml/analytics/analytic_function_executor.py +162 -76
  5. teradataml/analytics/byom/__init__.py +1 -1
  6. teradataml/analytics/json_parser/__init__.py +2 -0
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
  8. teradataml/analytics/json_parser/metadata.py +22 -4
  9. teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
  10. teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
  11. teradataml/analytics/sqle/__init__.py +3 -0
  12. teradataml/analytics/utils.py +4 -1
  13. teradataml/automl/__init__.py +2369 -464
  14. teradataml/automl/autodataprep/__init__.py +15 -0
  15. teradataml/automl/custom_json_utils.py +184 -112
  16. teradataml/automl/data_preparation.py +113 -58
  17. teradataml/automl/data_transformation.py +154 -53
  18. teradataml/automl/feature_engineering.py +113 -53
  19. teradataml/automl/feature_exploration.py +548 -25
  20. teradataml/automl/model_evaluation.py +260 -32
  21. teradataml/automl/model_training.py +399 -206
  22. teradataml/clients/auth_client.py +2 -2
  23. teradataml/common/aed_utils.py +11 -2
  24. teradataml/common/bulk_exposed_utils.py +4 -2
  25. teradataml/common/constants.py +62 -2
  26. teradataml/common/garbagecollector.py +50 -21
  27. teradataml/common/messagecodes.py +47 -2
  28. teradataml/common/messages.py +19 -1
  29. teradataml/common/sqlbundle.py +23 -6
  30. teradataml/common/utils.py +116 -10
  31. teradataml/context/aed_context.py +16 -10
  32. teradataml/data/Employee.csv +5 -0
  33. teradataml/data/Employee_Address.csv +4 -0
  34. teradataml/data/Employee_roles.csv +5 -0
  35. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  36. teradataml/data/byom_example.json +5 -0
  37. teradataml/data/creditcard_data.csv +284618 -0
  38. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  39. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
  40. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
  42. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  43. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
  44. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
  45. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
  46. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
  47. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
  48. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
  49. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
  50. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
  51. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
  52. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
  53. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
  54. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  55. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  56. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  57. teradataml/data/load_example_data.py +29 -11
  58. teradataml/data/payment_fraud_dataset.csv +10001 -0
  59. teradataml/data/teradataml_example.json +67 -0
  60. teradataml/dataframe/copy_to.py +714 -54
  61. teradataml/dataframe/dataframe.py +1153 -33
  62. teradataml/dataframe/dataframe_utils.py +8 -3
  63. teradataml/dataframe/functions.py +168 -1
  64. teradataml/dataframe/setop.py +4 -1
  65. teradataml/dataframe/sql.py +141 -9
  66. teradataml/dbutils/dbutils.py +470 -35
  67. teradataml/dbutils/filemgr.py +1 -1
  68. teradataml/hyperparameter_tuner/optimizer.py +456 -142
  69. teradataml/lib/aed_0_1.dll +0 -0
  70. teradataml/lib/libaed_0_1.dylib +0 -0
  71. teradataml/lib/libaed_0_1.so +0 -0
  72. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  73. teradataml/scriptmgmt/UserEnv.py +234 -34
  74. teradataml/scriptmgmt/lls_utils.py +43 -17
  75. teradataml/sdk/_json_parser.py +1 -1
  76. teradataml/sdk/api_client.py +9 -6
  77. teradataml/sdk/modelops/_client.py +3 -0
  78. teradataml/series/series.py +12 -7
  79. teradataml/store/feature_store/constants.py +601 -234
  80. teradataml/store/feature_store/feature_store.py +2886 -616
  81. teradataml/store/feature_store/mind_map.py +639 -0
  82. teradataml/store/feature_store/models.py +5831 -214
  83. teradataml/store/feature_store/utils.py +390 -0
  84. teradataml/table_operators/table_operator_util.py +1 -1
  85. teradataml/table_operators/templates/dataframe_register.template +6 -2
  86. teradataml/table_operators/templates/dataframe_udf.template +6 -2
  87. teradataml/utils/docstring.py +527 -0
  88. teradataml/utils/dtypes.py +93 -0
  89. teradataml/utils/internal_buffer.py +2 -2
  90. teradataml/utils/utils.py +41 -2
  91. teradataml/utils/validators.py +694 -17
  92. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
  93. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
  94. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
  95. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
  96. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
@@ -23,6 +23,7 @@ from teradataml.dataframe.copy_to import copy_to_sql
23
23
  from teradataml import Antiselect
24
24
  from teradataml import BincodeTransform
25
25
  from teradataml import ConvertTo
26
+ from teradataml import execute_sql
26
27
  from teradataml import FillRowId
27
28
  from teradataml import NonLinearCombineTransform
28
29
  from teradataml import OneHotEncodingTransform
@@ -32,7 +33,6 @@ from teradataml import ScaleTransform
32
33
  from teradataml import SimpleImputeTransform
33
34
  from teradataml import TargetEncodingTransform
34
35
  from teradataml import Transform, UtilFuncs, TeradataConstants
35
- from teradataml import execute_sql
36
36
  from teradataml.common.garbagecollector import GarbageCollector
37
37
  from teradataml.hyperparameter_tuner.utils import _ProgressBar
38
38
  from teradataml.options.configure import configure
@@ -48,10 +48,12 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
48
48
  def __init__(self,
49
49
  data,
50
50
  data_transformation_params,
51
- auto = True,
52
- verbose = 0,
53
- target_column_ind = False,
54
- table_name_mapping = {}):
51
+ auto=True,
52
+ verbose=0,
53
+ target_column_ind=False,
54
+ table_name_mapping={},
55
+ cluster=False,
56
+ feature_selection_method=None):
55
57
  """
56
58
  DESCRIPTION:
57
59
  Function initializes the data, data transformation object and running mode
@@ -89,7 +91,25 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
89
91
  Optional Argument.
90
92
  Specifies whether target column is present in given dataset.
91
93
  Default Value: False
92
- Types: bool
94
+ Types: bool
95
+
96
+ table_name_mapping:
97
+ Optional Argument.
98
+ Specifies the mapping of table names for the transformed data.
99
+ Default Value: {}
100
+ Types: dict
101
+
102
+ cluster:
103
+ Optional Argument.
104
+ Specifies whether to apply clustering techniques.
105
+ Default Value: False
106
+ Types: bool
107
+
108
+ feature_selection_method:
109
+ Optional Argument.
110
+ Specifies the feature selection method to be used.
111
+ Default Value: None
112
+ Types: str
93
113
  """
94
114
  self.data = data
95
115
  self.data_transformation_params = data_transformation_params
@@ -97,9 +117,13 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
97
117
  self.verbose = verbose
98
118
  self.target_column_ind = target_column_ind
99
119
  self.table_name_mapping = table_name_mapping
120
+ self.data_types = {key: value for key, value in self.data._column_names_and_types}
100
121
  self.data_node_id = data._nodeid
101
122
  self.table_name_mapping[self.data_node_id] = {}
102
123
 
124
+ self.cluster = cluster
125
+ self.feature_selection_method = feature_selection_method
126
+
103
127
  def data_transformation(self):
104
128
  """
105
129
  DESCRIPTION:
@@ -112,15 +136,17 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
112
136
  """
113
137
  # Initializing Feature Exploration
114
138
  _FeatureExplore.__init__(self,
115
- data = self.data,
116
- target_column = None,
117
- verbose = self.verbose)
139
+ data=self.data,
140
+ target_column=None,
141
+ verbose=self.verbose,
142
+ cluster=self.cluster)
118
143
  # Initializing Feature Engineering
119
- _FeatureEngineering.__init__(self,
120
- data = self.data,
121
- target_column = None,
122
- model_list = None,
123
- verbose = self.verbose)
144
+ _FeatureEngineering.__init__(self,
145
+ data=self.data,
146
+ target_column=None,
147
+ model_list=None,
148
+ verbose=self.verbose,
149
+ cluster=self.cluster)
124
150
 
125
151
  self._display_msg(msg="Data Transformation started ...", show_data=True)
126
152
  # Extracting target column details and type whether it is classification or not
@@ -128,13 +154,14 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
128
154
  self.classification_type = self.data_transformation_params.get("classification_type", False)
129
155
 
130
156
  # Setting number of jobs for progress bar based on mode of execution
131
- jobs = 10 if self.auto else 15
157
+ jobs = 9 if self.cluster else (10 if self.auto else 15)
132
158
  self.progress_bar = _ProgressBar(jobs=jobs, verbose=2, prefix='Transformation Running:')
133
159
 
134
160
  # Performing transformation carried out in feature engineering phase
135
161
  self.feature_engineering_transformation()
162
+
136
163
  # Performing transformation carried out in data preparation phase
137
- self.data_preparation_transformation()
164
+ self.data_preparation_transformation(feature_selection_method=self.feature_selection_method)
138
165
  self._display_msg(msg="Data Transformation completed.", show_data=True)
139
166
 
140
167
  return self.table_name_mapping
@@ -157,8 +184,9 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
157
184
  self.progress_bar.update()
158
185
 
159
186
  # Handling target column transformation
160
- if self.target_column_ind and self.classification_type:
161
- self._handle_target_column_transformation()
187
+ if not self.cluster:
188
+ if self.target_column_ind and self.classification_type:
189
+ self._handle_target_column_transformation()
162
190
  self.progress_bar.update()
163
191
 
164
192
  self._date_column_handling_transformation()
@@ -193,7 +221,7 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
193
221
  self._custom_anti_select_column_transformation()
194
222
  self.progress_bar.update()
195
223
 
196
- def data_preparation_transformation(self):
224
+ def data_preparation_transformation(self, feature_selection_method=None):
197
225
  """
198
226
  DESCRIPTION:
199
227
  Function performs transformation carried out in data preparation phase
@@ -209,14 +237,21 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
209
237
 
210
238
  # Performing transformation including feature selection using lasso, rfe and pca
211
239
  # followed by scaling
212
- self._feature_selection_lasso_transformation()
213
- self.progress_bar.update()
214
-
215
- self._feature_selection_rfe_transformation()
216
- self.progress_bar.update()
240
+ if not self.cluster:
241
+ self._feature_selection_lasso_transformation()
242
+ self.progress_bar.update()
217
243
 
218
- self._feature_selection_pca_transformation()
219
- self.progress_bar.update()
244
+ self._feature_selection_rfe_transformation()
245
+ self.progress_bar.update()
246
+
247
+ self._feature_selection_pca_transformation()
248
+ self.progress_bar.update()
249
+ else:
250
+ self._feature_selection_pca_transformation()
251
+ self.progress_bar.update()
252
+
253
+ self._feature_selection_non_pca_transformation()
254
+ self.progress_bar.update()
220
255
 
221
256
  def _preprocess_transformation(self):
222
257
  """
@@ -224,7 +259,7 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
224
259
  Function drops irrelevent columns and adds id column.
225
260
  """
226
261
  # Extracting irrelevant column list
227
- columns_to_be_removed = self.data_transformation_params.get("drop_irrelevent_columns", None)
262
+ columns_to_be_removed = self.data_transformation_params.get("drop_irrelevant_columns", None)
228
263
  if columns_to_be_removed:
229
264
  self.data = self.data.drop(columns_to_be_removed, axis=1)
230
265
  self._display_msg(msg="\nUpdated dataset after dropping irrelevant columns :",
@@ -297,9 +332,20 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
297
332
  imputation_cols = self.data_transformation_params.get("imputation_columns", None)
298
333
  if imputation_cols:
299
334
  sm_fit_obj = self.data_transformation_params.get("imputation_fit_object")
335
+ ## Workaround done for bug https://teradata-pe.atlassian.net/browse/TDAF-15617.
336
+ #partition_column = self.data_transformation_params.get("imputation_partition_column", None)
337
+
338
+ params = {"data" : self.data,
339
+ "object" : sm_fit_obj
340
+ }
341
+
342
+ # if partition_column is not None:
343
+ # params["data_partition_column"] = partition_column
344
+ # params["object_partition_column"] = partition_column
345
+
300
346
  # imputing column using fit object
301
- self.data = SimpleImputeTransform(data=self.data,
302
- object=sm_fit_obj).result
347
+ self.data = SimpleImputeTransform(**params).result
348
+
303
349
  self._display_msg(msg="\nUpdated dataset after imputing missing value containing columns :",
304
350
  data=self.data,
305
351
  progress_bar=self.progress_bar)
@@ -438,7 +484,34 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
438
484
  self._display_msg(msg="\nUpdated dataset after performing categorical encoding :",
439
485
  data=self.data,
440
486
  progress_bar=self.progress_bar)
487
+ return
441
488
 
489
+ # AutoFraud Routine
490
+ auto_target_encoding_ind = self.data_transformation_params.get("auto_target_encoding_ind", False)
491
+ auto_target_encoding_fit_obj = self.data_transformation_params.get("auto_target_encoding_fit_obj", None)
492
+ target_encoding_accumulate_columns = self.data_transformation_params.get("target_encoding_accumulate_columns")
493
+
494
+ if auto_target_encoding_ind:
495
+ # Adding transform parameters for performing encoding
496
+ transform_params = {
497
+ "data" : self.data,
498
+ "object" : auto_target_encoding_fit_obj,
499
+ "accumulate" : target_encoding_accumulate_columns,
500
+ "is_input_dense" : True,
501
+ "persist" : True,
502
+ "display_table_name" : False
503
+ }
504
+
505
+ # Performing one hot encoding transformation
506
+ self.data = TargetEncodingTransform(**transform_params).result
507
+
508
+ # Adding transformed data containing table to garbage collector
509
+ GarbageCollector._add_to_garbagecollector(self.data._table_name)
510
+
511
+ self._display_msg(msg="\nUpdated dataset after performing categorical encoding :",
512
+ data=self.data,
513
+ progress_bar=self.progress_bar)
514
+
442
515
  def _custom_categorical_encoding_transformation(self):
443
516
  """
444
517
  DESCRIPTION:
@@ -493,7 +566,7 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
493
566
  warnings.warn(message=f"Unseen categorical values found in test data column(s): {warn_cols}. \
494
567
  This may cause inaccurate predictions. Consider retraining the model with updated data.",
495
568
  stacklevel=0)
496
-
569
+
497
570
  self._display_msg(msg="\nUpdated dataset after performing customized categorical encoding :",
498
571
  data=self.data,
499
572
  progress_bar=self.progress_bar)
@@ -628,7 +701,9 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
628
701
  for classification problem.
629
702
  """
630
703
  # Fetching target column encoding indicator and fit object
704
+
631
705
  target_col_encode_ind = self.data_transformation_params.get("target_col_encode_ind", False)
706
+
632
707
  if target_col_encode_ind:
633
708
  # Extracting ordinal encoding fit object for target column
634
709
  target_col_ord_encoding_fit_obj = self.data_transformation_params.get("target_col_ord_encoding_fit_obj", None)
@@ -647,14 +722,7 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
647
722
  self.data = OrdinalEncodingTransform(**transform_params).result
648
723
  # Adding transformed data containing table to garbage collector
649
724
  GarbageCollector._add_to_garbagecollector(self.data._table_name)
650
- # Converting target column to integer datatype
651
- params = {
652
- "data" : self.data,
653
- "target_columns" : [self.data_target_column],
654
- "target_datatype" : ["integer"],
655
- "accumulate" : self._extract_list(self.data.columns, [self.data_target_column])
656
- }
657
- self.data = ConvertTo(**params).result
725
+
658
726
  self._display_msg(msg="\nUpdated dataset after performing target column transformation :",
659
727
  data=self.data,
660
728
  progress_bar=self.progress_bar)
@@ -715,17 +783,17 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
715
783
  accumulate=accumulate_cols).result
716
784
  # Displaying scaled dataset
717
785
  self._display_msg(msg="\nUpdated dataset after performing scaling on Lasso selected features :",
718
- data=lasso_df,
719
- progress_bar=self.progress_bar)
786
+ data=lasso_df,
787
+ progress_bar=self.progress_bar)
720
788
 
721
789
  # Uploading lasso dataset to table for further use
722
- table_name = UtilFuncs._generate_temp_table_name(prefix="lasso_new_test",
790
+ table_name = UtilFuncs._generate_temp_table_name(prefix="lasso_test",
723
791
  table_type = TeradataConstants.TERADATA_TABLE)
724
792
  # If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
725
793
  # table name in fully qualified format.
726
794
  table_name = UtilFuncs._extract_table_name(table_name)
727
795
  # Storing table name mapping for lasso dataset
728
- self.table_name_mapping[self.data_node_id]["lasso_new_test"] = table_name
796
+ self.table_name_mapping[self.data_node_id]["lasso_test"] = table_name
729
797
  # In the case of the VT option, the table was being persisted, so the VT condition is being checked.
730
798
  is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
731
799
  copy_to_sql(df = lasso_df, table_name= table_name, if_exists="replace", temporary=is_temporary)
@@ -760,17 +828,17 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
760
828
  accumulate=accumulate_cols).result
761
829
  # Displaying scaled dataset
762
830
  self._display_msg(msg="\nUpdated dataset after performing scaling on RFE selected features :",
763
- data=rfe_df,
764
- progress_bar=self.progress_bar)
831
+ data=rfe_df,
832
+ progress_bar=self.progress_bar)
765
833
 
766
834
  # Uploading rfe dataset to table for further use
767
- table_name = UtilFuncs._generate_temp_table_name(prefix="rfe_new_test",
835
+ table_name = UtilFuncs._generate_temp_table_name(prefix="rfe_test",
768
836
  table_type = TeradataConstants.TERADATA_TABLE)
769
837
  # If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
770
838
  # table name in fully qualified format.
771
839
  table_name = UtilFuncs._extract_table_name(table_name)
772
840
  # Storing table name mapping for rfe dataset
773
- self.table_name_mapping[self.data_node_id]["rfe_new_test"] = table_name
841
+ self.table_name_mapping[self.data_node_id]["rfe_test"] = table_name
774
842
  # In the case of the VT option, the table was being persisted, so the VT condition is being checked.
775
843
  is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
776
844
  copy_to_sql(df = rfe_df, table_name= table_name, if_exists="replace", temporary=is_temporary)
@@ -783,19 +851,19 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
783
851
  # Extracting fit object and column details for perfroming feature scaling
784
852
  pca_scale_fit_obj = self.data_transformation_params.get("pca_scale_fit_obj", None)
785
853
  pca_scale_col = self.data_transformation_params.get("pca_scale_col", None)
786
- # Extracting accumulate columns
787
- accumulate_cols = self._extract_list(self.data.columns, pca_scale_col)
788
-
854
+
789
855
  pca_scaled_df = self.data
790
856
  if pca_scale_fit_obj is not None:
857
+ # Extracting accumulate columns
858
+ accumulate_cols = self._extract_list(self.data.columns, pca_scale_col)
791
859
  # Scaling on pca dataset
792
860
  pca_scaled_df = ScaleTransform(data=self.data,
793
861
  object=pca_scale_fit_obj,
794
862
  accumulate=accumulate_cols).result
795
863
  # Displaying scaled dataset
796
864
  self._display_msg(msg="\nUpdated dataset after performing scaling for PCA feature selection :",
797
- data=pca_scaled_df,
798
- progress_bar=self.progress_bar)
865
+ data=pca_scaled_df,
866
+ progress_bar=self.progress_bar)
799
867
 
800
868
  # Convert to pandas dataframe for applying pca
801
869
  pca_scaled_pd = pca_scaled_df.to_pandas().reset_index()
@@ -832,14 +900,47 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
832
900
  progress_bar=self.progress_bar)
833
901
 
834
902
  # Uploading pca dataset to table for further use
835
- table_name = UtilFuncs._generate_temp_table_name(prefix="pca_new_test",
903
+ table_name = UtilFuncs._generate_temp_table_name(prefix="pca_test",
836
904
  table_type = TeradataConstants.TERADATA_TABLE)
837
905
  # If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
838
906
  # table name in fully qualified format.
839
907
  table_name = UtilFuncs._extract_table_name(table_name)
840
908
  # Storing table name mapping for pca dataset
841
- self.table_name_mapping[self.data_node_id]["pca_new_test"] = table_name
909
+ self.table_name_mapping[self.data_node_id]["pca_test"] = table_name
842
910
  # In the case of the VT option, the table was being persisted, so the VT condition is being checked.
843
911
  is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
844
912
  copy_to_sql(df = pca_df, table_name=table_name, if_exists="replace", temporary=is_temporary)
913
+
914
+ def _feature_selection_non_pca_transformation(self):
915
+ """
916
+ DESCRIPTION:
917
+ Function performs feature scaling on raw data for non-PCA clustering models.
918
+ """
919
+ self._display_msg(msg="\nRunning Non-PCA feature selection transformation for clustering...",
920
+ show_data=True,
921
+ progress_bar=self.progress_bar)
845
922
 
923
+ # Extracting fit object and columns for scaling
924
+ non_pca_scale_fit_obj = self.data_transformation_params.get("non_pca_scale_fit_obj", None)
925
+ non_pca_scale_col = self.data_transformation_params.get("non_pca_scale_col", None)
926
+
927
+ if non_pca_scale_fit_obj is not None and non_pca_scale_col is not None:
928
+ accumulate_cols = self._extract_list(self.data.columns, non_pca_scale_col)
929
+
930
+ # Scaling dataset
931
+ scaled_df = ScaleTransform(data=self.data,
932
+ object=non_pca_scale_fit_obj,
933
+ accumulate=accumulate_cols).result
934
+
935
+ # Displaying scaled dataset
936
+ self._display_msg(msg="\nUpdated dataset after performing Non-PCA scaling for clustering:",
937
+ data=scaled_df,
938
+ progress_bar=self.progress_bar)
939
+
940
+ # Uploading non_pca dataset to SQL
941
+ table_name = UtilFuncs._generate_temp_table_name(prefix="non_pca_test",
942
+ table_type=TeradataConstants.TERADATA_TABLE)
943
+ self.table_name_mapping[self.data_node_id]["non_pca_test"] = table_name
944
+ copy_to_sql(df=scaled_df, table_name=table_name, if_exists="replace")
945
+ else:
946
+ print(" Missing non_pca_scale_fit_obj or non_pca_scale_col in data transformation params.")