teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +315 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +95 -8
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/metadata.py +12 -3
  8. teradataml/analytics/json_parser/utils.py +7 -2
  9. teradataml/analytics/sqle/__init__.py +5 -1
  10. teradataml/analytics/table_operator/__init__.py +1 -1
  11. teradataml/analytics/uaf/__init__.py +1 -1
  12. teradataml/analytics/utils.py +4 -0
  13. teradataml/analytics/valib.py +18 -4
  14. teradataml/automl/__init__.py +51 -6
  15. teradataml/automl/data_preparation.py +59 -35
  16. teradataml/automl/data_transformation.py +58 -33
  17. teradataml/automl/feature_engineering.py +27 -12
  18. teradataml/automl/model_training.py +73 -46
  19. teradataml/common/constants.py +88 -29
  20. teradataml/common/garbagecollector.py +2 -1
  21. teradataml/common/messagecodes.py +19 -3
  22. teradataml/common/messages.py +6 -1
  23. teradataml/common/sqlbundle.py +64 -12
  24. teradataml/common/utils.py +246 -47
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +161 -27
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/byom_example.json +11 -0
  29. teradataml/data/dataframe_example.json +18 -2
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  37. teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
  38. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  39. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  40. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  41. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  42. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  43. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  44. teradataml/data/hnsw_alter_data.csv +5 -0
  45. teradataml/data/hnsw_data.csv +10 -0
  46. teradataml/data/jsons/byom/h2opredict.json +1 -1
  47. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  48. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  49. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  50. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  51. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  52. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  53. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  54. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  55. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  56. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  57. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  58. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  59. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  60. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  61. teradataml/data/medical_readings.csv +101 -0
  62. teradataml/data/patient_profile.csv +101 -0
  63. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  64. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  65. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  66. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  67. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  68. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  69. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  70. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  71. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  72. teradataml/data/target_udt_data.csv +8 -0
  73. teradataml/data/templates/open_source_ml.json +3 -2
  74. teradataml/data/teradataml_example.json +8 -0
  75. teradataml/data/vectordistance_example.json +4 -0
  76. teradataml/dataframe/copy_to.py +8 -3
  77. teradataml/dataframe/data_transfer.py +11 -1
  78. teradataml/dataframe/dataframe.py +1049 -285
  79. teradataml/dataframe/dataframe_utils.py +152 -20
  80. teradataml/dataframe/functions.py +578 -35
  81. teradataml/dataframe/setop.py +11 -6
  82. teradataml/dataframe/sql.py +185 -16
  83. teradataml/dbutils/dbutils.py +1049 -115
  84. teradataml/dbutils/filemgr.py +48 -1
  85. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  86. teradataml/lib/aed_0_1.dll +0 -0
  87. teradataml/opensource/__init__.py +1 -1
  88. teradataml/opensource/_base.py +1466 -0
  89. teradataml/opensource/_class.py +464 -0
  90. teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
  91. teradataml/opensource/_lightgbm.py +949 -0
  92. teradataml/opensource/_sklearn.py +1008 -0
  93. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
  94. teradataml/options/__init__.py +54 -38
  95. teradataml/options/configure.py +131 -27
  96. teradataml/options/display.py +13 -2
  97. teradataml/plot/axis.py +47 -8
  98. teradataml/plot/figure.py +33 -0
  99. teradataml/plot/plot.py +63 -13
  100. teradataml/scriptmgmt/UserEnv.py +5 -5
  101. teradataml/scriptmgmt/lls_utils.py +130 -40
  102. teradataml/store/__init__.py +12 -0
  103. teradataml/store/feature_store/__init__.py +0 -0
  104. teradataml/store/feature_store/constants.py +291 -0
  105. teradataml/store/feature_store/feature_store.py +2318 -0
  106. teradataml/store/feature_store/models.py +1505 -0
  107. teradataml/table_operators/Apply.py +32 -18
  108. teradataml/table_operators/Script.py +3 -1
  109. teradataml/table_operators/TableOperator.py +3 -1
  110. teradataml/table_operators/query_generator.py +3 -0
  111. teradataml/table_operators/table_operator_query_generator.py +3 -1
  112. teradataml/table_operators/table_operator_util.py +37 -38
  113. teradataml/table_operators/templates/dataframe_register.template +69 -0
  114. teradataml/utils/dtypes.py +51 -2
  115. teradataml/utils/internal_buffer.py +18 -0
  116. teradataml/utils/validators.py +99 -8
  117. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
  118. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
  119. teradataml/libaed_0_1.dylib +0 -0
  120. teradataml/libaed_0_1.so +0 -0
  121. teradataml/opensource/sklearn/__init__.py +0 -1
  122. teradataml/opensource/sklearn/_class.py +0 -255
  123. teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
  124. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  125. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  126. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
@@ -40,6 +40,9 @@ from teradataml.common.garbagecollector import GarbageCollector
40
40
  from teradataml.dataframe.sql_functions import case
41
41
  from teradataml.hyperparameter_tuner.utils import _ProgressBar
42
42
  from teradataml.utils.validators import _Validators
43
+ from teradataml.common.utils import UtilFuncs
44
+ from teradataml.common.constants import TeradataConstants
45
+ from teradataml.options.configure import configure
43
46
 
44
47
 
45
48
  class _FeatureEngineering:
@@ -131,8 +134,9 @@ class _FeatureEngineering:
131
134
  self.data_transform_dict = {}
132
135
  self.one_hot_obj_count = 0
133
136
  self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
134
- self.volatile = kwargs.get('volatile', False)
135
137
  self.persist = kwargs.get('persist', False)
138
+ self.volatile = kwargs.get('volatile', False) or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and self.persist is False)
139
+
136
140
 
137
141
  # Method for doing feature engineering on data -> adding id, removing futile col, imputation, encoding(one hot)
138
142
  def feature_engineering(self,
@@ -259,6 +263,11 @@ class _FeatureEngineering:
259
263
  Returns extracted elements in form of list.
260
264
 
261
265
  """
266
+ # Ensure list1 and list2 are lists, default to empty list if None
267
+ if list1 is None:
268
+ list1 = []
269
+ if list2 is None:
270
+ list2 = []
262
271
  new_lst = list(set(list1) - set(list2))
263
272
  return new_lst
264
273
 
@@ -273,7 +282,7 @@ class _FeatureEngineering:
273
282
  show_data=True)
274
283
  start_time = time.time()
275
284
  rows = self.data.shape[0]
276
- self.data=self.data.drop_duplicate()
285
+ self.data=self.data.drop_duplicate(self.data.columns)
277
286
  if rows != self.data.shape[0]:
278
287
  self._display_msg(msg=f'Updated dataset sample after removing {rows-self.data.shape[0]} duplicate records:',
279
288
  data=self.data,
@@ -347,12 +356,10 @@ class _FeatureEngineering:
347
356
 
348
357
  # Detecting and removing futile columns, if categorical_column exists
349
358
  if len(categorical_columns) != 0:
350
-
351
359
  obj = CategoricalSummary(data=self.data,
352
360
  target_columns=categorical_columns,
353
361
  volatile=self.volatile,
354
362
  persist=self.persist)
355
-
356
363
  gfc_out = GetFutileColumns(data=self.data,
357
364
  object=obj,
358
365
  category_summary_column="ColumnName",
@@ -565,11 +572,18 @@ class _FeatureEngineering:
565
572
 
566
573
  # Removing rows with missing target column value
567
574
  self.data = self.data.dropna(subset=[self.target_column])
575
+
576
+ params = {
577
+ "data": self.data,
578
+ "target_columns": self.data.columns,
579
+ "persist": True,
580
+ "display_table_name": False
581
+ }
568
582
 
569
- obj = ColumnSummary(data=self.data,
570
- target_columns=self.data.columns,
571
- volatile=self.volatile,
572
- persist=self.persist)
583
+ obj = ColumnSummary(**params)
584
+
585
+ # Adding transformed data containing table to garbage collector
586
+ GarbageCollector._add_to_garbagecollector(obj.result._table_name)
573
587
 
574
588
  cols_miss_val={}
575
589
  # Iterating over each row in the column summary result
@@ -704,7 +718,7 @@ class _FeatureEngineering:
704
718
  for key, val in self.imputation_cols.items():
705
719
 
706
720
  col_stat.append(key)
707
- if self.data_types[key] in ['float', 'int']:
721
+ if self.data_types[key] in ['float', 'int', 'decimal.Decimal']:
708
722
  val = skew_data[f'skew_{key}']
709
723
  # Median imputation method, if abs(skewness value) > 1
710
724
  if abs(val) > 1:
@@ -713,7 +727,7 @@ class _FeatureEngineering:
713
727
  else:
714
728
  stat.append('mean')
715
729
  # Mode imputation method, if categorical column
716
- else:
730
+ elif self.data_types[key] in ['str']:
717
731
  stat.append('mode')
718
732
 
719
733
  self._display_msg(msg="Columns with their imputation method:",
@@ -1802,10 +1816,11 @@ class _FeatureEngineering:
1802
1816
  RETURNS:
1803
1817
  Tuple containing volatile and persist parameters.
1804
1818
  """
1805
- volatile = self.volatile
1819
+ # Prioritizing persist argument and then volatile
1806
1820
  persist = self.persist
1821
+ volatile = self.volatile or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and persist is False)
1807
1822
  if self.custom_data is not None and self.custom_data.get(func_indicator, False):
1808
1823
  volatile = self.custom_data[param_name].get("volatile", False)
1809
1824
  persist = self.custom_data[param_name].get("persist", False)
1810
1825
 
1811
- return (volatile, persist)
1826
+ return (volatile, persist)
@@ -26,9 +26,10 @@ from teradataml.context import context as tdmlctx
26
26
  from teradataml.dataframe.copy_to import copy_to_sql
27
27
  from teradataml.dataframe.dataframe import DataFrame
28
28
  from teradataml import execute_sql, get_connection
29
- from teradataml import SVM, GLM, DecisionForest, XGBoost, GridSearch, KNN, RandomSearch
29
+ from teradataml import configure, SVM, GLM, DecisionForest, XGBoost, GridSearch, KNN, RandomSearch
30
30
  from teradataml.utils.validators import _Validators
31
-
31
+ from teradataml.common.utils import UtilFuncs
32
+ from teradataml.common.constants import TeradataConstants
32
33
 
33
34
  class _ModelTraining:
34
35
 
@@ -113,6 +114,12 @@ class _ModelTraining:
113
114
  session.
114
115
  Default Value: False
115
116
  Types: bool
117
+
118
+ seed:
119
+ Optional Argument.
120
+ Specifies the random seed for reproducibility.
121
+ Default Value: 42
122
+ Types: int
116
123
  """
117
124
  self.data = data
118
125
  self.target_column = target_column
@@ -125,6 +132,7 @@ class _ModelTraining:
125
132
  self.startify_col = None
126
133
  self.persist = kwargs.get("persist", False)
127
134
  self.volatile = kwargs.get("volatile", False)
135
+ self.seed = kwargs.get("seed", 42)
128
136
 
129
137
  def model_training(self,
130
138
  auto=True,
@@ -498,7 +506,7 @@ class _ModelTraining:
498
506
  'max_depth': tuple(max_depth),
499
507
  'min_node_size': tuple(min_node_size),
500
508
  'iter_num': tuple(iter_num),
501
- 'seed':42
509
+ 'seed':self.seed
502
510
  }
503
511
  # Hyperparameters for Decision Forest model
504
512
  df_params = {
@@ -509,7 +517,7 @@ class _ModelTraining:
509
517
  'max_depth': tuple(max_depth),
510
518
  'min_node_size': tuple(min_node_size),
511
519
  'num_trees': tuple(num_trees),
512
- 'seed':42
520
+ 'seed':self.seed
513
521
  }
514
522
 
515
523
  # Updating model type in case of classification
@@ -796,7 +804,8 @@ class _ModelTraining:
796
804
  trained_models = []
797
805
  for param in model_params:
798
806
  result = self._hyperparameter_tunning(param, trainng_datas)
799
- trained_models.append(result)
807
+ if result is not None:
808
+ trained_models.append(result)
800
809
 
801
810
  models_df = pd.concat(trained_models, ignore_index=True)
802
811
  return models_df
@@ -872,53 +881,71 @@ class _ModelTraining:
872
881
  verbose = 0
873
882
 
874
883
  # Hyperparameter tunning
884
+ # Parallel run opens multiple connections for parallel execution,
885
+ # but volatile tables are not accessible across different sessions.
886
+ # Therefore, execution is performed sequentially by setting run_parallel=False.
887
+
888
+ run_parallel = configure.temp_object_type != TeradataConstants.TERADATA_VOLATILE_TABLE
889
+
890
+ common_params = {
891
+ "data": train_data,
892
+ "evaluation_metric": self.stopping_metric,
893
+ "early_stop": self.stopping_tolerance,
894
+ "run_parallel": run_parallel,
895
+ "sample_seed": self.seed,
896
+ "sample_id_column": "id",
897
+ "discard_invalid_column_params": True,
898
+ "stratify_column": self.startify_col,
899
+ "verbose": verbose,
900
+ "max_time": self.max_runtime_secs,
901
+ "suppress_refer_msg": True
902
+ }
903
+
875
904
  if model_param['name'] == 'knn':
876
- _obj.fit(data=train_data, evaluation_metric=self.stopping_metric,
877
- early_stop=self.stopping_tolerance, run_parallel=True,
878
- sample_seed=42, sample_id_column='id', discard_invalid_column_params=True,
879
- stratify_column=self.startify_col,verbose=verbose, max_time=self.max_runtime_secs)
905
+ _obj.fit(**common_params)
880
906
  else:
881
- _obj.fit(data=train_data, evaluation_metric=self.stopping_metric,
882
- early_stop=self.stopping_tolerance, **eval_params,
883
- run_parallel=True, discard_invalid_column_params=True, sample_seed=42,
884
- sample_id_column='id',stratify_column=self.startify_col, verbose=verbose, max_time=self.max_runtime_secs)
907
+ _obj.fit(**common_params, **eval_params)
885
908
 
886
909
  # Getting all passed models
887
910
  model_info = _obj.model_stats.merge(_obj.models[_obj.models['STATUS']=='PASS'][['MODEL_ID', 'DATA_ID', 'PARAMETERS']],
888
911
  on='MODEL_ID', how='inner')
889
- # Creating mapping data ID to feature selection method
890
- data_id_to_table_map = {"DF_0": ('lasso', train_data[0]._table_name),
891
- "DF_1": ('rfe', train_data[1]._table_name),
892
- "DF_2": ('pca', train_data[2]._table_name)}
893
-
894
- # Updating model stats with feature selection method and result table
895
- for index, row in model_info.iterrows():
896
- model_info.loc[index, 'FEATURE_SELECTION'] = data_id_to_table_map[row['DATA_ID']][0]
897
- model_info.loc[index, 'DATA_TABLE'] = data_id_to_table_map[row['DATA_ID']][1]
898
- model_info.loc[index, 'RESULT_TABLE'] = _obj.get_model(row['MODEL_ID']).result._table_name
899
- model_info.loc[index, 'model-obj'] = _obj.get_model(row['MODEL_ID'])
900
-
901
- # Dropping column 'DATA_ID'
902
- model_info.drop(['DATA_ID'], axis=1, inplace=True)
903
-
904
- model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
905
-
906
- if not self.is_classification_type():
907
- # Calculating Adjusted-R2 for regression
908
- # Getting size and feature count for each feature selection method
909
- methods = ["lasso", "rfe", "pca"]
910
- size_map = {method : df.select('id').size for method, df in zip(methods, train_data)}
911
- feature_count_map = {method : len(df.columns) - 2 for method, df in zip(methods, train_data)}
912
- model_info['ADJUSTED_R2'] = model_info.apply(lambda row:
913
- 1 - ((1 - row['R2']) * (size_map[row['FEATURE_SELECTION']] - 1) /
914
- (size_map[row['FEATURE_SELECTION']] - feature_count_map[row['FEATURE_SELECTION']] - 1)), axis=1)
915
-
916
- self._display_msg(msg="-"*100,
917
- progress_bar=self.progress_bar,
918
- show_data=True)
919
- self.progress_bar.update()
912
+ if not model_info.empty:
913
+ # Creating mapping data ID to feature selection method
914
+ data_id_to_table_map = {"DF_0": ('lasso', train_data[0]._table_name),
915
+ "DF_1": ('rfe', train_data[1]._table_name),
916
+ "DF_2": ('pca', train_data[2]._table_name)}
917
+
918
+ # Updating model stats with feature selection method and result table
919
+ for index, row in model_info.iterrows():
920
+ model_info.loc[index, 'FEATURE_SELECTION'] = data_id_to_table_map[row['DATA_ID']][0]
921
+ model_info.loc[index, 'DATA_TABLE'] = data_id_to_table_map[row['DATA_ID']][1]
922
+ model_info.loc[index, 'RESULT_TABLE'] = _obj.get_model(row['MODEL_ID']).result._table_name
923
+ model_info.loc[index, 'model-obj'] = _obj.get_model(row['MODEL_ID'])
924
+
925
+ # Dropping column 'DATA_ID'
926
+ model_info.drop(['DATA_ID'], axis=1, inplace=True)
920
927
 
921
- return model_info
928
+ model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
929
+
930
+ if not self.is_classification_type():
931
+ # Calculating Adjusted-R2 for regression
932
+ # Getting size and feature count for each feature selection method
933
+ methods = ["lasso", "rfe", "pca"]
934
+ size_map = {method : df.select('id').size for method, df in zip(methods, train_data)}
935
+ feature_count_map = {method : len(df.columns) - 2 for method, df in zip(methods, train_data)}
936
+ model_info['ADJUSTED_R2'] = model_info.apply(lambda row:
937
+ 1 - ((1 - row['R2']) * (size_map[row['FEATURE_SELECTION']] - 1) /
938
+ (size_map[row['FEATURE_SELECTION']] - feature_count_map[row['FEATURE_SELECTION']] - 1)), axis=1)
939
+
940
+ self._display_msg(msg="-"*100,
941
+ progress_bar=self.progress_bar,
942
+ show_data=True)
943
+ self.progress_bar.update()
944
+
945
+ return model_info
946
+
947
+ # Returning None, if no model is passed
948
+ return None
922
949
 
923
950
  @staticmethod
924
951
  def _eval_params_generation(ml_name,
@@ -986,4 +1013,4 @@ class _ModelTraining:
986
1013
  elif ml_name == 'glm':
987
1014
  eval_params['family'] = 'GAUSSIAN'
988
1015
 
989
- return eval_params
1016
+ return eval_params
@@ -14,10 +14,17 @@ A class for holding all constants
14
14
  import re
15
15
  import sqlalchemy
16
16
  from enum import Enum
17
- from teradataml.options.configure import configure
18
17
  from teradatasqlalchemy.types import (INTEGER, SMALLINT, BIGINT, BYTEINT, DECIMAL, FLOAT, NUMBER, VARCHAR)
19
18
  from teradatasqlalchemy.types import (DATE, TIME, TIMESTAMP)
20
19
  from teradatasqlalchemy.types import (BYTE, VARBYTE, BLOB)
20
+ from teradatasqlalchemy import (CHAR, CLOB)
21
+ from teradatasqlalchemy import (PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP)
22
+ from teradatasqlalchemy import (INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
23
+ INTERVAL_DAY,INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
24
+ INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
25
+ INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
26
+ INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND,
27
+ INTERVAL_SECOND)
21
28
  from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
22
29
 
23
30
 
@@ -53,6 +60,9 @@ class SQLConstants(Enum):
53
60
  SQL_DELETE_ALL_ROWS = 29
54
61
  SQL_DELETE_SPECIFIC_ROW = 30
55
62
  SQL_EXEC_STORED_PROCEDURE = 31
63
+ SQL_SELECT_COLUMNNAMES_WITH_WHERE = 32
64
+ SQL_HELP_DATABASE = 33
65
+ SQL_HELP_DATALAKE = 34
56
66
  CONSTRAINT = ["check_constraint", "primary_key_constraint",
57
67
  "foreign_key_constraint", "unique_key_constraint"]
58
68
 
@@ -123,6 +133,14 @@ class TeradataTypes(Enum):
123
133
  TD_DATE_TYPES = [DATE, sqlalchemy.sql.sqltypes.Date]
124
134
  TD_DATE_CODES = ["DA"]
125
135
  TD_NULL_TYPE = "NULLTYPE"
136
+ TD_ALL_TYPES = (BYTEINT, SMALLINT, INTEGER, BIGINT, DECIMAL, FLOAT, NUMBER,
137
+ TIMESTAMP, DATE, TIME, CHAR, VARCHAR, CLOB, BYTE, VARBYTE,
138
+ BLOB, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP,
139
+ INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
140
+ INTERVAL_DAY, INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
141
+ INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
142
+ INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
143
+ INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND, INTERVAL_SECOND)
126
144
 
127
145
 
128
146
  class TeradataTableKindConstants(Enum):
@@ -427,6 +445,8 @@ class TableOperatorConstants(Enum):
427
445
  APPLY_TEMPLATE = "dataframe_apply.template"
428
446
  # Template of the intermediate script that will be generated for UDF.
429
447
  UDF_TEMPLATE = "dataframe_udf.template"
448
+ # Template of the intermediate script that will be generated for register.
449
+ REGISTER_TEMPLATE = "dataframe_register.template"
430
450
  # In-DB execution mode.
431
451
  INDB_EXEC = "IN-DB"
432
452
  # Local execution mode.
@@ -443,6 +463,8 @@ class TableOperatorConstants(Enum):
443
463
  APPLY_OP = "apply"
444
464
  # udf operation.
445
465
  UDF_OP = "udf"
466
+ # register operation.
467
+ REGISTER_OP = "register"
446
468
  # Template of the script_executor that will be used to generate the temporary script_executor file.
447
469
  SCRIPT_TEMPLATE = "script_executor.template"
448
470
  # Log Type.
@@ -464,11 +486,18 @@ class TableOperatorConstants(Enum):
464
486
 
465
487
  # Check if Python interpretor and add-ons are installed or not.
466
488
  # Location of In-DB packages is indicated by configure.indb_install_location.
489
+ # Check for both python and pip versions.
467
490
  CHECK_PYTHON_INSTALLED = """SELECT distinct * FROM SCRIPT(
468
491
  ON (select 1) PARTITION BY ANY
469
- SCRIPT_COMMAND('{}/bin/pip3 --version')
470
- returns('package VARCHAR(256)'))
492
+ SCRIPT_COMMAND('echo $({0}/bin/pip3 --version) -- $({0}/bin/python3 --version)')
493
+ returns('pip VARCHAR(256)'))
471
494
  """
495
+ # Check which version of rpms are installed.
496
+ INDB_PYTHON_PATH = """SEL DISTINCT os_ver
497
+ FROM SCRIPT(
498
+ SCRIPT_COMMAND('grep CPE_NAME /etc/os-release')
499
+ RETURNS('os_ver VARCHAR(100)')
500
+ );"""
472
501
 
473
502
  # Script Query to get Python packages and corresponding versions.
474
503
  # Location of In-DB packages is indicated by configure.indb_install_location.
@@ -480,6 +509,9 @@ class TableOperatorConstants(Enum):
480
509
  "delimiter(' ') " \
481
510
  "returns('package VARCHAR({2}), " \
482
511
  "version VARCHAR({2})'))"
512
+
513
+ SCRIPT_LIST_FILES_QUERY = "SELECT DISTINCT * FROM SCRIPT (SCRIPT_COMMAND " \
514
+ "('ls ./{}') RETURNS ('Files VARCHAR({})'))"
483
515
 
484
516
  class ValibConstants(Enum):
485
517
  # A dictionary that maps teradataml name of the exposed VALIB function name
@@ -778,7 +810,8 @@ class ValibConstants(Enum):
778
810
  "subdivision_method": "subdivisionmethod",
779
811
  "subdivision_threshold": "subdivisionthreshold",
780
812
  "filter": "where",
781
- "gen_sql_only": "gensqlonly"
813
+ "gen_sql_only": "gensqlonly",
814
+ "charset": "charset"
782
815
  },
783
816
 
784
817
  "DATAEXPLORER": {
@@ -795,7 +828,8 @@ class ValibConstants(Enum):
795
828
  "stats_options": "statsoptions",
796
829
  "distinct": "uniques",
797
830
  "filter": "where",
798
- "gen_sql": "gensql"
831
+ "gen_sql": "gensql",
832
+ "charset": "charset"
799
833
  },
800
834
 
801
835
  "FREQUENCY": {
@@ -809,7 +843,8 @@ class ValibConstants(Enum):
809
843
  "style": "style",
810
844
  "top_n": "topvalues",
811
845
  "filter": "where",
812
- "gen_sql_only": "gensqlonly"
846
+ "gen_sql_only": "gensqlonly",
847
+ "charset": "charset"
813
848
  },
814
849
 
815
850
  "HISTOGRAM": {
@@ -824,7 +859,8 @@ class ValibConstants(Enum):
824
859
  "stats_columns": "statisticscolumns",
825
860
  "hist_style": "style",
826
861
  "filter": "where",
827
- "gen_sql_only": "gensqlonly"
862
+ "gen_sql_only": "gensqlonly",
863
+ "charset": "charset"
828
864
  },
829
865
 
830
866
  "STATISTICS": {
@@ -835,7 +871,8 @@ class ValibConstants(Enum):
835
871
  "statistical_method": "statisticalmethod",
836
872
  "stats_options": "statsoptions",
837
873
  "filter": "where",
838
- "gen_sql_only": "gensqlonly"
874
+ "gen_sql_only": "gensqlonly",
875
+ "charset": "charset"
839
876
  },
840
877
 
841
878
  "TEXTFIELDANALYZER": {
@@ -843,7 +880,8 @@ class ValibConstants(Enum):
843
880
  "exclude_columns": "columnstoexclude",
844
881
  "analyze_numerics": "extendednumericanalysis",
845
882
  "analyze_unicode": "extendedunicodeanalysis",
846
- "gen_sql_only": "gensqlonly"
883
+ "gen_sql_only": "gensqlonly",
884
+ "charset": "charset"
847
885
  },
848
886
 
849
887
  "VALUES": {
@@ -852,7 +890,8 @@ class ValibConstants(Enum):
852
890
  "group_columns": "groupby",
853
891
  "distinct": "uniques",
854
892
  "filter": "where",
855
- "gen_sql_only": "gensqlonly"
893
+ "gen_sql_only": "gensqlonly",
894
+ "charset": "charset"
856
895
  },
857
896
 
858
897
  "ASSOCIATION": {
@@ -877,7 +916,8 @@ class ValibConstants(Enum):
877
916
  "filter": "where",
878
917
  "no_support_results": "dropsupporttables",
879
918
  "support_result_prefix": "resulttableprefix",
880
- "gen_sql_only": "gensqlonly"
919
+ "gen_sql_only": "gensqlonly",
920
+ "charset": "charset"
881
921
  },
882
922
 
883
923
  "KMEANS": {
@@ -887,7 +927,8 @@ class ValibConstants(Enum):
887
927
  "continuation": "continuation",
888
928
  "max_iter": "iterations",
889
929
  "operator_database": "operatordatabase",
890
- "threshold": "threshold"
930
+ "threshold": "threshold",
931
+ "charset": "charset"
891
932
  },
892
933
 
893
934
  "KMEANSSCORE": {
@@ -895,7 +936,8 @@ class ValibConstants(Enum):
895
936
  "cluster_column": "clustername",
896
937
  "fallback": "fallback",
897
938
  "operator_database": "operatordatabase",
898
- "accumulate": "retain"
939
+ "accumulate": "retain",
940
+ "charset": "charset"
899
941
  },
900
942
 
901
943
  "DECISIONTREE": {
@@ -907,7 +949,8 @@ class ValibConstants(Enum):
907
949
  "max_depth": "max_depth",
908
950
  "num_splits": "min_records",
909
951
  "operator_database": "operatordatabase",
910
- "pruning": "pruning"
952
+ "pruning": "pruning",
953
+ "charset": "charset"
911
954
  },
912
955
 
913
956
  "DECISIONTREESCORE": {
@@ -917,7 +960,8 @@ class ValibConstants(Enum):
917
960
  "profile": "profiletables",
918
961
  "accumulate": "retain",
919
962
  "targeted_value": "targetedvalue",
920
- "gen_sql_only": "gensqlonly"
963
+ "gen_sql_only": "gensqlonly",
964
+ "charset": "charset"
921
965
  },
922
966
 
923
967
  "MATRIX": {
@@ -927,7 +971,8 @@ class ValibConstants(Enum):
927
971
  "matrix_output": "matrixoutput",
928
972
  "type": "matrixtype",
929
973
  "handle_nulls": "nullhandling",
930
- "filter": "where"
974
+ "filter": "where",
975
+ "charset": "charset"
931
976
  },
932
977
 
933
978
  "LINEAR": {
@@ -949,7 +994,8 @@ class ValibConstants(Enum):
949
994
  "stepwise": "stepwise",
950
995
  "use_fstat": "usefstat",
951
996
  "use_pvalue": "usepvalue",
952
- "variance_prop_threshold": "varianceproportionthreshold"
997
+ "variance_prop_threshold": "varianceproportionthreshold",
998
+ "charset": "charset"
953
999
  },
954
1000
 
955
1001
  "LINEARSCORE": {
@@ -957,7 +1003,8 @@ class ValibConstants(Enum):
957
1003
  "response_column": "predicted",
958
1004
  "residual_column": "residual",
959
1005
  "accumulate": "retain",
960
- "gen_sql_only": "gensqlonly"
1006
+ "gen_sql_only": "gensqlonly",
1007
+ "charset": "charset"
961
1008
  },
962
1009
 
963
1010
  "LOGISTIC": {
@@ -987,7 +1034,8 @@ class ValibConstants(Enum):
987
1034
  "end_threshold": "thresholdend",
988
1035
  "increment_threshold": "thresholdincrement",
989
1036
  "threshold_output": "thresholdtable",
990
- "variance_prop_threshold": "varianceproportionthreshold"
1037
+ "variance_prop_threshold": "varianceproportionthreshold",
1038
+ "charset": "charset"
991
1039
  },
992
1040
 
993
1041
  "LOGISTICSCORE": {
@@ -999,7 +1047,8 @@ class ValibConstants(Enum):
999
1047
  "start_threshold": "thresholdbegin",
1000
1048
  "end_threshold": "thresholdend",
1001
1049
  "increment_threshold": "thresholdincrement",
1002
- "gen_sql_only": "gensqlonly"
1050
+ "gen_sql_only": "gensqlonly",
1051
+ "charset": "charset"
1003
1052
 
1004
1053
  # The following 3 arguments three should not be present for LogRegPredict function
1005
1054
  # where as when the function is LogRegEvaluator, at least one of these should be
@@ -1027,13 +1076,15 @@ class ValibConstants(Enum):
1027
1076
  "rotation_type": "rotationtype",
1028
1077
  "load_threshold": "thresholdloading",
1029
1078
  "percent_threshold": "thresholdpercent",
1030
- "variance_prop_threshold": "varianceproportionthreshold"
1079
+ "variance_prop_threshold": "varianceproportionthreshold",
1080
+ "charset": "charset"
1031
1081
  },
1032
1082
 
1033
1083
  "FACTORSCORE": {
1034
1084
  "index_columns": "index",
1035
1085
  "accumulate": "retain",
1036
- "gen_sql_only": "gensqlonly"
1086
+ "gen_sql_only": "gensqlonly",
1087
+ "charset": "charset"
1037
1088
  },
1038
1089
 
1039
1090
  "PARAMETRICTEST": {
@@ -1052,7 +1103,8 @@ class ValibConstants(Enum):
1052
1103
  "style": "teststyle",
1053
1104
  "probability_threshold": "thresholdprobability",
1054
1105
  "with_indicator": "withindicator",
1055
- "gen_sql_only": "gensqlonly"
1106
+ "gen_sql_only": "gensqlonly",
1107
+ "charset": "charset"
1056
1108
  },
1057
1109
 
1058
1110
  "BINOMIALTEST": {
@@ -1067,7 +1119,8 @@ class ValibConstants(Enum):
1067
1119
  "stats_database": "statsdatabase",
1068
1120
  "style": "teststyle",
1069
1121
  "probability_threshold": "thresholdprobability",
1070
- "gen_sql_only": "gensqlonly"
1122
+ "gen_sql_only": "gensqlonly",
1123
+ "charset": "charset"
1071
1124
  },
1072
1125
 
1073
1126
  "KSTEST": {
@@ -1079,7 +1132,8 @@ class ValibConstants(Enum):
1079
1132
  "stats_database": "statsdatabase",
1080
1133
  "style": "teststyle",
1081
1134
  "probability_threshold": "thresholdprobability",
1082
- "gen_sql_only": "gensqlonly"
1135
+ "gen_sql_only": "gensqlonly",
1136
+ "charset": "charset"
1083
1137
  },
1084
1138
 
1085
1139
  "CHISQUARETEST": {
@@ -1093,7 +1147,8 @@ class ValibConstants(Enum):
1093
1147
  "stats_database": "statsdatabase",
1094
1148
  "style": "teststyle",
1095
1149
  "probability_threshold": "thresholdprobability",
1096
- "gen_sql_only": "gensqlonly"
1150
+ "gen_sql_only": "gensqlonly",
1151
+ "charset": "charset"
1097
1152
  },
1098
1153
 
1099
1154
  "RANKTEST": {
@@ -1112,7 +1167,8 @@ class ValibConstants(Enum):
1112
1167
  "style": "teststyle",
1113
1168
  "probability_threshold": "thresholdprobability",
1114
1169
  "treatment_column": "treatmentcolumn",
1115
- "gen_sql_only": "gensqlonly"
1170
+ "gen_sql_only": "gensqlonly",
1171
+ "charset": "charset"
1116
1172
  },
1117
1173
 
1118
1174
  "VARTRAN": {
@@ -1123,13 +1179,15 @@ class ValibConstants(Enum):
1123
1179
  "allow_duplicates": "multiset",
1124
1180
  "nopi": "noindex",
1125
1181
  "filter": "whereclause",
1126
- "gen_sql_only": "gensqlonly"
1182
+ "gen_sql_only": "gensqlonly",
1183
+ "charset": "charset"
1127
1184
  },
1128
1185
 
1129
1186
  "REPORT": {
1130
1187
  "analysis_type": "analysistype",
1131
1188
  "filter": "where",
1132
- "gen_sql_only": "gensqlonly"
1189
+ "gen_sql_only": "gensqlonly",
1190
+ "charset": "charset"
1133
1191
  }
1134
1192
  }
1135
1193
 
@@ -1424,6 +1482,7 @@ class HTTPRequest(Enum):
1424
1482
  POST = "post"
1425
1483
  PUT = "put"
1426
1484
  DELETE = "delete"
1485
+ PATCH = "patch"
1427
1486
 
1428
1487
 
1429
1488
  class AsyncStatusColumns(Enum):
@@ -520,7 +520,8 @@ class GarbageCollector():
520
520
  fileparts = file.split(GarbageCollector.__filenameseperator)
521
521
  hostname = fileparts[1]
522
522
  filepid = int(fileparts[2])
523
- if hostname == tdmlctx.context._get_host_ip():
523
+ # Check for both host ip and hostname in case user passed hostname for creating connection.
524
+ if hostname == tdmlctx.context._get_host_ip() or hostname == tdmlctx.context._get_host():
524
525
  if filepid == os.getpid() or not psutil.pid_exists(filepid):
525
526
  tempfiles.append(filepath)
526
527
  except (IndexError, ValueError):