teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (107) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +86 -13
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +7 -12
  6. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  7. teradataml/analytics/sqle/__init__.py +16 -1
  8. teradataml/analytics/utils.py +15 -1
  9. teradataml/automl/__init__.py +290 -106
  10. teradataml/automl/autodataprep/__init__.py +471 -0
  11. teradataml/automl/data_preparation.py +29 -10
  12. teradataml/automl/data_transformation.py +11 -0
  13. teradataml/automl/feature_engineering.py +64 -4
  14. teradataml/automl/feature_exploration.py +639 -25
  15. teradataml/automl/model_training.py +1 -1
  16. teradataml/clients/auth_client.py +2 -2
  17. teradataml/common/constants.py +61 -26
  18. teradataml/common/messagecodes.py +2 -1
  19. teradataml/common/messages.py +5 -4
  20. teradataml/common/utils.py +255 -37
  21. teradataml/context/context.py +225 -87
  22. teradataml/data/apriori_example.json +22 -0
  23. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  24. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  25. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  26. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  27. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  28. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  29. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  30. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  31. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  32. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  33. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  34. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  35. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  36. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  37. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  38. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  39. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  40. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  41. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  42. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  43. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  45. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  46. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  47. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  48. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  49. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  51. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  52. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  53. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  54. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  55. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  56. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  57. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  58. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  59. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  60. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  61. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  62. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  63. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  64. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  65. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  66. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  67. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  68. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  69. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  70. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
  71. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
  72. teradataml/data/ner_dict.csv +8 -0
  73. teradataml/data/ner_input_eng.csv +7 -0
  74. teradataml/data/ner_rule.csv +5 -0
  75. teradataml/data/pos_input.csv +40 -0
  76. teradataml/data/tdnerextractor_example.json +14 -0
  77. teradataml/data/teradataml_example.json +13 -0
  78. teradataml/data/textmorph_example.json +5 -0
  79. teradataml/data/to_num_data.csv +4 -0
  80. teradataml/data/tochar_data.csv +5 -0
  81. teradataml/data/trans_dense.csv +16 -0
  82. teradataml/data/trans_sparse.csv +55 -0
  83. teradataml/dataframe/copy_to.py +37 -26
  84. teradataml/dataframe/data_transfer.py +61 -45
  85. teradataml/dataframe/dataframe.py +130 -50
  86. teradataml/dataframe/dataframe_utils.py +15 -2
  87. teradataml/dataframe/functions.py +109 -9
  88. teradataml/dataframe/sql.py +328 -76
  89. teradataml/dbutils/dbutils.py +33 -13
  90. teradataml/dbutils/filemgr.py +14 -10
  91. teradataml/lib/aed_0_1.dll +0 -0
  92. teradataml/opensource/_base.py +6 -157
  93. teradataml/options/configure.py +4 -5
  94. teradataml/scriptmgmt/UserEnv.py +305 -38
  95. teradataml/scriptmgmt/lls_utils.py +376 -130
  96. teradataml/store/__init__.py +1 -1
  97. teradataml/table_operators/Apply.py +16 -1
  98. teradataml/table_operators/Script.py +20 -1
  99. teradataml/table_operators/table_operator_util.py +58 -9
  100. teradataml/utils/dtypes.py +2 -1
  101. teradataml/utils/internal_buffer.py +22 -2
  102. teradataml/utils/validators.py +313 -57
  103. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +89 -14
  104. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +107 -77
  105. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
  106. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
  107. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
@@ -137,6 +137,9 @@ class _FeatureEngineering:
137
137
  self.persist = kwargs.get('persist', False)
138
138
  self.volatile = kwargs.get('volatile', False) or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and self.persist is False)
139
139
 
140
+ self.data_mapping = {}
141
+ self.progress_prefix = kwargs.get('progress_prefix', None)
142
+ self.aml_phases = kwargs.get('automl_phases', None)
140
143
 
141
144
  # Method for doing feature engineering on data -> adding id, removing futile col, imputation, encoding(one hot)
142
145
  def feature_engineering(self,
@@ -162,7 +165,7 @@ class _FeatureEngineering:
162
165
  second element represents list of columns which are not participating in outlier tranformation.
163
166
  """
164
167
  # Assigning number of base jobs for progress bar.
165
- base_jobs = 13 if auto else 17
168
+ base_jobs = 12 if auto else 17
166
169
 
167
170
  # Updating model list based on distinct value of target column for classification type
168
171
  if self.is_classification_type():
@@ -172,10 +175,14 @@ class _FeatureEngineering:
172
175
 
173
176
  # Updating number of jobs for progress bar based on number of models.
174
177
  jobs = base_jobs + len(self.model_list)
175
- self.progress_bar = _ProgressBar(jobs=jobs, verbose=2, prefix='Automl Running:')
178
+ self.progress_bar = _ProgressBar(jobs=jobs,
179
+ verbose=2,
180
+ prefix=self.progress_prefix)
176
181
 
177
182
  self._display_heading(phase=1,
178
- progress_bar=self.progress_bar)
183
+ progress_bar=self.progress_bar,
184
+ automl_phases=self.aml_phases)
185
+
179
186
  self._display_msg(msg='Feature Engineering started ...',
180
187
  progress_bar=self.progress_bar)
181
188
 
@@ -239,7 +246,7 @@ class _FeatureEngineering:
239
246
  self._non_linear_transformation()
240
247
  self.progress_bar.update()
241
248
 
242
- return self.data, self.excluded_cols, self.target_label, self.data_transform_dict
249
+ return self.data, self.excluded_cols, self.target_label, self.data_transform_dict, self.data_mapping
243
250
 
244
251
  def _extract_list(self,
245
252
  list1,
@@ -369,6 +376,9 @@ class _FeatureEngineering:
369
376
 
370
377
  # Extracting Futile columns
371
378
  f_cols = [row[0] for row in gfc_out.result.itertuples()]
379
+
380
+ self.data_mapping['categorical_summary'] = obj.result._table_name
381
+ self.data_mapping['futile_columns'] = gfc_out.result._table_name
372
382
 
373
383
  if len(f_cols) == 0:
374
384
  self._display_msg(inline_msg="Analysis indicates all categorical columns are significant. No action Needed.",
@@ -378,6 +388,15 @@ class _FeatureEngineering:
378
388
  self.data = self.data.drop(f_cols, axis=1)
379
389
  # Storing futile column list in data transform dictionary
380
390
  self.data_transform_dict['futile_columns'] = f_cols
391
+
392
+ if self.persist:
393
+ table_name = UtilFuncs._generate_temp_table_name(table_type=TeradataConstants.TERADATA_TABLE,
394
+ gc_on_quit=False)
395
+ self.data.to_sql(table_name)
396
+ else:
397
+ self.data.materialize()
398
+
399
+ self.data_mapping['data_without_futile_columns'] = self.data._table_name
381
400
  self._display_msg(msg='Removing Futile columns:',
382
401
  col_lst=f_cols,
383
402
  progress_bar=self.progress_bar)
@@ -553,6 +572,13 @@ class _FeatureEngineering:
553
572
  # Storing date column list in data transform dictionary
554
573
  self.data_transform_dict['date_columns'] = self.date_column_list
555
574
  self._handle_date_columns_helper()
575
+ if self.persist:
576
+ table_name = UtilFuncs._generate_temp_table_name(table_type=TeradataConstants.TERADATA_TABLE,
577
+ gc_on_quit=False)
578
+ self.data.to_sql(table_name)
579
+ else:
580
+ self.data.materialize()
581
+ self.data_mapping['data_after_date_handling'] = self.data._table_name
556
582
 
557
583
  end_time = time.time()
558
584
  self._display_msg(msg="Total time to handle date features: {:.2f} sec\n".format(end_time-start_time),
@@ -766,6 +792,9 @@ class _FeatureEngineering:
766
792
  persist=self.persist)
767
793
 
768
794
  self.data = sm.result
795
+ self.data_mapping['fit_simpleimpute_output'] = fit_obj.output_data._table_name
796
+ self.data_mapping['fit_simpleimpute_result'] = fit_obj.output._table_name
797
+ self.data_mapping['data_without_missing_values'] = self.data._table_name
769
798
  self._display_msg(msg="Sample of dataset after Imputation:",
770
799
  data=self.data,
771
800
  progress_bar=self.progress_bar)
@@ -878,6 +907,11 @@ class _FeatureEngineering:
878
907
  transform_param["persist"] = False
879
908
  # Updating dataset with transform result
880
909
  self.data = SimpleImputeTransform(**transform_param).result
910
+
911
+ self.data_mapping['fit_simpleimpute_output'] = fit_obj.output_data._table_name
912
+ self.data_mapping['fit_simpleimpute_result'] = fit_obj.output._table_name
913
+ self.data_mapping['data_without_missing_values'] = self.data._table_name
914
+
881
915
  if not volatile and not persist:
882
916
  # Adding transformed data containing table to garbage collector
883
917
  GarbageCollector._add_to_garbagecollector(self.data._table_name)
@@ -984,6 +1018,10 @@ class _FeatureEngineering:
984
1018
  if not volatile and not persist:
985
1019
  # Adding transformed data containing table to garbage collector
986
1020
  GarbageCollector._add_to_garbagecollector(self.data._table_name)
1021
+
1022
+ self.data_mapping['fit_eql_width'] = eql_bin_code_fit.output._table_name
1023
+ self.data_mapping['eql_width_bincoded_data'] = self.data._table_name
1024
+
987
1025
  self._display_msg(msg="\nUpdated dataset sample after performing Equal-Width binning :-",
988
1026
  data=self.data,
989
1027
  progress_bar=self.progress_bar)
@@ -1032,6 +1070,8 @@ class _FeatureEngineering:
1032
1070
  var_transform_params["volatile"] = True
1033
1071
  var_transform_params["persist"] = False
1034
1072
  self.data = BincodeTransform(**var_transform_params).result
1073
+ self.data_mapping['fit_var_width'] = var_bin_code_fit.output._table_name
1074
+ self.data_mapping['var_width_bincoded_data'] = self.data._table_name
1035
1075
  if not volatile and not persist:
1036
1076
  # Adding transformed data containing table to garbage collector
1037
1077
  GarbageCollector._add_to_garbagecollector(self.data._table_name)
@@ -1153,6 +1193,7 @@ class _FeatureEngineering:
1153
1193
  if not volatile and not persist:
1154
1194
  # Adding transformed data containing table to garbage collector
1155
1195
  GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1196
+ self.data_mapping['string_manipulated_data'] = transform_output._table_name
1156
1197
  return transform_output
1157
1198
 
1158
1199
  def _one_hot_encoding(self,
@@ -1222,6 +1263,9 @@ class _FeatureEngineering:
1222
1263
  # Adding transformed data containing table to garbage collector
1223
1264
  GarbageCollector._add_to_garbagecollector(transform_output._table_name)
1224
1265
  self.data = transform_output.drop(drop_lst, axis=1)
1266
+ self.data.materialize()
1267
+ self.data_mapping['one_hot_encoded_data'] = transform_output._table_name
1268
+ self.data_mapping['fit_ohe_result'] = fit_obj.result._table_name
1225
1269
 
1226
1270
  def _ordinal_encoding(self,
1227
1271
  ordinal_columns):
@@ -1279,6 +1323,10 @@ class _FeatureEngineering:
1279
1323
  # Adding transformed data containing table to garbage collector
1280
1324
  GarbageCollector._add_to_garbagecollector(self.data._table_name)
1281
1325
 
1326
+ self.data_mapping['fit_ordinal_output'] = ord_fit_obj.output_data._table_name
1327
+ self.data_mapping['fit_ordinal_result'] = ord_fit_obj.result._table_name
1328
+ self.data_mapping['ordinal_encoded_data'] = self.data._table_name
1329
+
1282
1330
  if len(ordinal_columns) == 1 and ordinal_columns[0] == self.target_column:
1283
1331
  self.target_label = ord_fit_obj
1284
1332
 
@@ -1325,6 +1373,7 @@ class _FeatureEngineering:
1325
1373
  "encoder_method" : encoder_method,
1326
1374
  "target_columns" : col,
1327
1375
  "response_column" : response_column,
1376
+ "default_values": -1,
1328
1377
  "volatile" : volatile,
1329
1378
  "persist" : persist
1330
1379
  }
@@ -1358,6 +1407,9 @@ class _FeatureEngineering:
1358
1407
  if not volatile and not persist:
1359
1408
  # Adding transformed data containing table to garbage collector
1360
1409
  GarbageCollector._add_to_garbagecollector(self.data._table_name)
1410
+ self.data_mapping[f'fit_{col}_target_output'] = tar_fit_obj.output_data._table_name
1411
+ self.data_mapping[f'fit_{col}_target_result'] = tar_fit_obj.result._table_name
1412
+ self.data_mapping[f'{col}_target_encoded_data'] = self.data._table_name
1361
1413
 
1362
1414
  def _encoding_categorical_columns(self):
1363
1415
  """
@@ -1590,6 +1642,9 @@ class _FeatureEngineering:
1590
1642
  if not volatile and not persist:
1591
1643
  # Adding transformed data containing table to garbage collector
1592
1644
  GarbageCollector._add_to_garbagecollector(self.data._table_name)
1645
+
1646
+ self.data_mapping['fit_numerical_result'] = num_fit_obj.result._table_name
1647
+ self.data_mapping['numerical_transformed_data'] = self.data._table_name
1593
1648
  self._display_msg(msg="Updated dataset sample after applying numerical transformation:",
1594
1649
  data=self.data,
1595
1650
  progress_bar=self.progress_bar)
@@ -1630,6 +1685,7 @@ class _FeatureEngineering:
1630
1685
  if apply_method in (["sininv","sigmoid"]):
1631
1686
  # Applying numapply transformation
1632
1687
  self.data = self._numapply_transformation(col,transform_val)
1688
+ self.data_mapping[f'{apply_method}_transformed_data'] = self.data._table_name
1633
1689
  self._display_msg(msg="Updated dataset sample after applying numapply transformation:",
1634
1690
  data=self.data,
1635
1691
  progress_bar=self.progress_bar)
@@ -1734,6 +1790,10 @@ class _FeatureEngineering:
1734
1790
  transform_params["persist"] = False
1735
1791
  self.data = NonLinearCombineTransform(**transform_params).result
1736
1792
 
1793
+ self.data_mapping[f'fit_nonlinear_{comb}_output'] = fit_obj.output_data._table_name
1794
+ self.data_mapping[f'fit_nonlinear_{comb}_result'] = fit_obj.result._table_name
1795
+ self.data_mapping['non_linear_transformed_data'] = self.data._table_name
1796
+
1737
1797
  if not volatile and not persist:
1738
1798
  # Adding transformed data containing table to garbage collector
1739
1799
  GarbageCollector._add_to_garbagecollector(self.data._table_name)