teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +182 -13
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +8 -13
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +16 -1
- teradataml/analytics/utils.py +60 -1
- teradataml/automl/__init__.py +290 -106
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +29 -10
- teradataml/automl/data_transformation.py +11 -0
- teradataml/automl/feature_engineering.py +64 -4
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +1 -1
- teradataml/clients/auth_client.py +12 -8
- teradataml/clients/keycloak_client.py +165 -0
- teradataml/common/constants.py +71 -26
- teradataml/common/exceptions.py +32 -0
- teradataml/common/messagecodes.py +28 -0
- teradataml/common/messages.py +13 -4
- teradataml/common/sqlbundle.py +3 -2
- teradataml/common/utils.py +345 -45
- teradataml/context/context.py +259 -93
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +1 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pattern_matching_data.csv +11 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -1
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/url_data.csv +10 -9
- teradataml/dataframe/copy_to.py +38 -27
- teradataml/dataframe/data_transfer.py +61 -45
- teradataml/dataframe/dataframe.py +1110 -132
- teradataml/dataframe/dataframe_utils.py +73 -27
- teradataml/dataframe/functions.py +1070 -9
- teradataml/dataframe/sql.py +750 -959
- teradataml/dbutils/dbutils.py +33 -13
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/utils.py +4 -2
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/_base.py +12 -157
- teradataml/options/configure.py +24 -9
- teradataml/scriptmgmt/UserEnv.py +317 -39
- teradataml/scriptmgmt/lls_utils.py +456 -135
- teradataml/sdk/README.md +79 -0
- teradataml/sdk/__init__.py +4 -0
- teradataml/sdk/_auth_modes.py +422 -0
- teradataml/sdk/_func_params.py +487 -0
- teradataml/sdk/_json_parser.py +453 -0
- teradataml/sdk/_openapi_spec_constants.py +249 -0
- teradataml/sdk/_utils.py +236 -0
- teradataml/sdk/api_client.py +897 -0
- teradataml/sdk/constants.py +62 -0
- teradataml/sdk/modelops/__init__.py +98 -0
- teradataml/sdk/modelops/_client.py +406 -0
- teradataml/sdk/modelops/_constants.py +304 -0
- teradataml/sdk/modelops/models.py +2308 -0
- teradataml/sdk/spinner.py +107 -0
- teradataml/store/__init__.py +1 -1
- teradataml/table_operators/Apply.py +16 -1
- teradataml/table_operators/Script.py +20 -1
- teradataml/table_operators/query_generator.py +4 -21
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/internal_buffer.py +22 -2
- teradataml/utils/utils.py +0 -1
- teradataml/utils/validators.py +318 -58
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/METADATA +188 -14
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/RECORD +131 -84
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/zip-safe +0 -0
|
@@ -137,6 +137,9 @@ class _FeatureEngineering:
|
|
|
137
137
|
self.persist = kwargs.get('persist', False)
|
|
138
138
|
self.volatile = kwargs.get('volatile', False) or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and self.persist is False)
|
|
139
139
|
|
|
140
|
+
self.data_mapping = {}
|
|
141
|
+
self.progress_prefix = kwargs.get('progress_prefix', None)
|
|
142
|
+
self.aml_phases = kwargs.get('automl_phases', None)
|
|
140
143
|
|
|
141
144
|
# Method for doing feature engineering on data -> adding id, removing futile col, imputation, encoding(one hot)
|
|
142
145
|
def feature_engineering(self,
|
|
@@ -162,7 +165,7 @@ class _FeatureEngineering:
|
|
|
162
165
|
second element represents list of columns which are not participating in outlier tranformation.
|
|
163
166
|
"""
|
|
164
167
|
# Assigning number of base jobs for progress bar.
|
|
165
|
-
base_jobs =
|
|
168
|
+
base_jobs = 12 if auto else 17
|
|
166
169
|
|
|
167
170
|
# Updating model list based on distinct value of target column for classification type
|
|
168
171
|
if self.is_classification_type():
|
|
@@ -172,10 +175,14 @@ class _FeatureEngineering:
|
|
|
172
175
|
|
|
173
176
|
# Updating number of jobs for progress bar based on number of models.
|
|
174
177
|
jobs = base_jobs + len(self.model_list)
|
|
175
|
-
self.progress_bar = _ProgressBar(jobs=jobs,
|
|
178
|
+
self.progress_bar = _ProgressBar(jobs=jobs,
|
|
179
|
+
verbose=2,
|
|
180
|
+
prefix=self.progress_prefix)
|
|
176
181
|
|
|
177
182
|
self._display_heading(phase=1,
|
|
178
|
-
progress_bar=self.progress_bar
|
|
183
|
+
progress_bar=self.progress_bar,
|
|
184
|
+
automl_phases=self.aml_phases)
|
|
185
|
+
|
|
179
186
|
self._display_msg(msg='Feature Engineering started ...',
|
|
180
187
|
progress_bar=self.progress_bar)
|
|
181
188
|
|
|
@@ -239,7 +246,7 @@ class _FeatureEngineering:
|
|
|
239
246
|
self._non_linear_transformation()
|
|
240
247
|
self.progress_bar.update()
|
|
241
248
|
|
|
242
|
-
return self.data, self.excluded_cols, self.target_label, self.data_transform_dict
|
|
249
|
+
return self.data, self.excluded_cols, self.target_label, self.data_transform_dict, self.data_mapping
|
|
243
250
|
|
|
244
251
|
def _extract_list(self,
|
|
245
252
|
list1,
|
|
@@ -369,6 +376,9 @@ class _FeatureEngineering:
|
|
|
369
376
|
|
|
370
377
|
# Extracting Futile columns
|
|
371
378
|
f_cols = [row[0] for row in gfc_out.result.itertuples()]
|
|
379
|
+
|
|
380
|
+
self.data_mapping['categorical_summary'] = obj.result._table_name
|
|
381
|
+
self.data_mapping['futile_columns'] = gfc_out.result._table_name
|
|
372
382
|
|
|
373
383
|
if len(f_cols) == 0:
|
|
374
384
|
self._display_msg(inline_msg="Analysis indicates all categorical columns are significant. No action Needed.",
|
|
@@ -378,6 +388,15 @@ class _FeatureEngineering:
|
|
|
378
388
|
self.data = self.data.drop(f_cols, axis=1)
|
|
379
389
|
# Storing futile column list in data transform dictionary
|
|
380
390
|
self.data_transform_dict['futile_columns'] = f_cols
|
|
391
|
+
|
|
392
|
+
if self.persist:
|
|
393
|
+
table_name = UtilFuncs._generate_temp_table_name(table_type=TeradataConstants.TERADATA_TABLE,
|
|
394
|
+
gc_on_quit=False)
|
|
395
|
+
self.data.to_sql(table_name)
|
|
396
|
+
else:
|
|
397
|
+
self.data.materialize()
|
|
398
|
+
|
|
399
|
+
self.data_mapping['data_without_futile_columns'] = self.data._table_name
|
|
381
400
|
self._display_msg(msg='Removing Futile columns:',
|
|
382
401
|
col_lst=f_cols,
|
|
383
402
|
progress_bar=self.progress_bar)
|
|
@@ -553,6 +572,13 @@ class _FeatureEngineering:
|
|
|
553
572
|
# Storing date column list in data transform dictionary
|
|
554
573
|
self.data_transform_dict['date_columns'] = self.date_column_list
|
|
555
574
|
self._handle_date_columns_helper()
|
|
575
|
+
if self.persist:
|
|
576
|
+
table_name = UtilFuncs._generate_temp_table_name(table_type=TeradataConstants.TERADATA_TABLE,
|
|
577
|
+
gc_on_quit=False)
|
|
578
|
+
self.data.to_sql(table_name)
|
|
579
|
+
else:
|
|
580
|
+
self.data.materialize()
|
|
581
|
+
self.data_mapping['data_after_date_handling'] = self.data._table_name
|
|
556
582
|
|
|
557
583
|
end_time = time.time()
|
|
558
584
|
self._display_msg(msg="Total time to handle date features: {:.2f} sec\n".format(end_time-start_time),
|
|
@@ -766,6 +792,9 @@ class _FeatureEngineering:
|
|
|
766
792
|
persist=self.persist)
|
|
767
793
|
|
|
768
794
|
self.data = sm.result
|
|
795
|
+
self.data_mapping['fit_simpleimpute_output'] = fit_obj.output_data._table_name
|
|
796
|
+
self.data_mapping['fit_simpleimpute_result'] = fit_obj.output._table_name
|
|
797
|
+
self.data_mapping['data_without_missing_values'] = self.data._table_name
|
|
769
798
|
self._display_msg(msg="Sample of dataset after Imputation:",
|
|
770
799
|
data=self.data,
|
|
771
800
|
progress_bar=self.progress_bar)
|
|
@@ -878,6 +907,11 @@ class _FeatureEngineering:
|
|
|
878
907
|
transform_param["persist"] = False
|
|
879
908
|
# Updating dataset with transform result
|
|
880
909
|
self.data = SimpleImputeTransform(**transform_param).result
|
|
910
|
+
|
|
911
|
+
self.data_mapping['fit_simpleimpute_output'] = fit_obj.output_data._table_name
|
|
912
|
+
self.data_mapping['fit_simpleimpute_result'] = fit_obj.output._table_name
|
|
913
|
+
self.data_mapping['data_without_missing_values'] = self.data._table_name
|
|
914
|
+
|
|
881
915
|
if not volatile and not persist:
|
|
882
916
|
# Adding transformed data containing table to garbage collector
|
|
883
917
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
@@ -984,6 +1018,10 @@ class _FeatureEngineering:
|
|
|
984
1018
|
if not volatile and not persist:
|
|
985
1019
|
# Adding transformed data containing table to garbage collector
|
|
986
1020
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
1021
|
+
|
|
1022
|
+
self.data_mapping['fit_eql_width'] = eql_bin_code_fit.output._table_name
|
|
1023
|
+
self.data_mapping['eql_width_bincoded_data'] = self.data._table_name
|
|
1024
|
+
|
|
987
1025
|
self._display_msg(msg="\nUpdated dataset sample after performing Equal-Width binning :-",
|
|
988
1026
|
data=self.data,
|
|
989
1027
|
progress_bar=self.progress_bar)
|
|
@@ -1032,6 +1070,8 @@ class _FeatureEngineering:
|
|
|
1032
1070
|
var_transform_params["volatile"] = True
|
|
1033
1071
|
var_transform_params["persist"] = False
|
|
1034
1072
|
self.data = BincodeTransform(**var_transform_params).result
|
|
1073
|
+
self.data_mapping['fit_var_width'] = var_bin_code_fit.output._table_name
|
|
1074
|
+
self.data_mapping['var_width_bincoded_data'] = self.data._table_name
|
|
1035
1075
|
if not volatile and not persist:
|
|
1036
1076
|
# Adding transformed data containing table to garbage collector
|
|
1037
1077
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
@@ -1153,6 +1193,7 @@ class _FeatureEngineering:
|
|
|
1153
1193
|
if not volatile and not persist:
|
|
1154
1194
|
# Adding transformed data containing table to garbage collector
|
|
1155
1195
|
GarbageCollector._add_to_garbagecollector(transform_output._table_name)
|
|
1196
|
+
self.data_mapping['string_manipulated_data'] = transform_output._table_name
|
|
1156
1197
|
return transform_output
|
|
1157
1198
|
|
|
1158
1199
|
def _one_hot_encoding(self,
|
|
@@ -1222,6 +1263,9 @@ class _FeatureEngineering:
|
|
|
1222
1263
|
# Adding transformed data containing table to garbage collector
|
|
1223
1264
|
GarbageCollector._add_to_garbagecollector(transform_output._table_name)
|
|
1224
1265
|
self.data = transform_output.drop(drop_lst, axis=1)
|
|
1266
|
+
self.data.materialize()
|
|
1267
|
+
self.data_mapping['one_hot_encoded_data'] = transform_output._table_name
|
|
1268
|
+
self.data_mapping['fit_ohe_result'] = fit_obj.result._table_name
|
|
1225
1269
|
|
|
1226
1270
|
def _ordinal_encoding(self,
|
|
1227
1271
|
ordinal_columns):
|
|
@@ -1279,6 +1323,10 @@ class _FeatureEngineering:
|
|
|
1279
1323
|
# Adding transformed data containing table to garbage collector
|
|
1280
1324
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
1281
1325
|
|
|
1326
|
+
self.data_mapping['fit_ordinal_output'] = ord_fit_obj.output_data._table_name
|
|
1327
|
+
self.data_mapping['fit_ordinal_result'] = ord_fit_obj.result._table_name
|
|
1328
|
+
self.data_mapping['ordinal_encoded_data'] = self.data._table_name
|
|
1329
|
+
|
|
1282
1330
|
if len(ordinal_columns) == 1 and ordinal_columns[0] == self.target_column:
|
|
1283
1331
|
self.target_label = ord_fit_obj
|
|
1284
1332
|
|
|
@@ -1325,6 +1373,7 @@ class _FeatureEngineering:
|
|
|
1325
1373
|
"encoder_method" : encoder_method,
|
|
1326
1374
|
"target_columns" : col,
|
|
1327
1375
|
"response_column" : response_column,
|
|
1376
|
+
"default_values": -1,
|
|
1328
1377
|
"volatile" : volatile,
|
|
1329
1378
|
"persist" : persist
|
|
1330
1379
|
}
|
|
@@ -1358,6 +1407,9 @@ class _FeatureEngineering:
|
|
|
1358
1407
|
if not volatile and not persist:
|
|
1359
1408
|
# Adding transformed data containing table to garbage collector
|
|
1360
1409
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
1410
|
+
self.data_mapping[f'fit_{col}_target_output'] = tar_fit_obj.output_data._table_name
|
|
1411
|
+
self.data_mapping[f'fit_{col}_target_result'] = tar_fit_obj.result._table_name
|
|
1412
|
+
self.data_mapping[f'{col}_target_encoded_data'] = self.data._table_name
|
|
1361
1413
|
|
|
1362
1414
|
def _encoding_categorical_columns(self):
|
|
1363
1415
|
"""
|
|
@@ -1590,6 +1642,9 @@ class _FeatureEngineering:
|
|
|
1590
1642
|
if not volatile and not persist:
|
|
1591
1643
|
# Adding transformed data containing table to garbage collector
|
|
1592
1644
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|
|
1645
|
+
|
|
1646
|
+
self.data_mapping['fit_numerical_result'] = num_fit_obj.result._table_name
|
|
1647
|
+
self.data_mapping['numerical_transformed_data'] = self.data._table_name
|
|
1593
1648
|
self._display_msg(msg="Updated dataset sample after applying numerical transformation:",
|
|
1594
1649
|
data=self.data,
|
|
1595
1650
|
progress_bar=self.progress_bar)
|
|
@@ -1630,6 +1685,7 @@ class _FeatureEngineering:
|
|
|
1630
1685
|
if apply_method in (["sininv","sigmoid"]):
|
|
1631
1686
|
# Applying numapply transformation
|
|
1632
1687
|
self.data = self._numapply_transformation(col,transform_val)
|
|
1688
|
+
self.data_mapping[f'{apply_method}_transformed_data'] = self.data._table_name
|
|
1633
1689
|
self._display_msg(msg="Updated dataset sample after applying numapply transformation:",
|
|
1634
1690
|
data=self.data,
|
|
1635
1691
|
progress_bar=self.progress_bar)
|
|
@@ -1734,6 +1790,10 @@ class _FeatureEngineering:
|
|
|
1734
1790
|
transform_params["persist"] = False
|
|
1735
1791
|
self.data = NonLinearCombineTransform(**transform_params).result
|
|
1736
1792
|
|
|
1793
|
+
self.data_mapping[f'fit_nonlinear_{comb}_output'] = fit_obj.output_data._table_name
|
|
1794
|
+
self.data_mapping[f'fit_nonlinear_{comb}_result'] = fit_obj.result._table_name
|
|
1795
|
+
self.data_mapping['non_linear_transformed_data'] = self.data._table_name
|
|
1796
|
+
|
|
1737
1797
|
if not volatile and not persist:
|
|
1738
1798
|
# Adding transformed data containing table to garbage collector
|
|
1739
1799
|
GarbageCollector._add_to_garbagecollector(self.data._table_name)
|