teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (84) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +119 -0
  3. teradataml/_version.py +1 -1
  4. teradataml/analytics/analytic_function_executor.py +18 -6
  5. teradataml/analytics/byom/__init__.py +1 -1
  6. teradataml/analytics/sqle/__init__.py +4 -1
  7. teradataml/analytics/valib.py +18 -4
  8. teradataml/automl/__init__.py +51 -6
  9. teradataml/automl/data_preparation.py +56 -33
  10. teradataml/automl/data_transformation.py +58 -33
  11. teradataml/automl/feature_engineering.py +12 -5
  12. teradataml/automl/model_training.py +34 -13
  13. teradataml/common/__init__.py +1 -2
  14. teradataml/common/constants.py +64 -40
  15. teradataml/common/messagecodes.py +13 -3
  16. teradataml/common/messages.py +4 -1
  17. teradataml/common/sqlbundle.py +40 -10
  18. teradataml/common/utils.py +113 -39
  19. teradataml/common/warnings.py +11 -0
  20. teradataml/context/context.py +141 -17
  21. teradataml/data/amazon_reviews_25.csv +26 -0
  22. teradataml/data/byom_example.json +11 -0
  23. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  24. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  25. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  26. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  27. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  28. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  29. teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
  30. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  31. teradataml/data/hnsw_alter_data.csv +5 -0
  32. teradataml/data/hnsw_data.csv +10 -0
  33. teradataml/data/jsons/byom/h2opredict.json +1 -1
  34. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  35. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  36. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  37. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  38. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  39. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
  40. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +1 -1
  41. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +5 -5
  42. teradataml/data/teradataml_example.json +8 -0
  43. teradataml/data/vectordistance_example.json +1 -1
  44. teradataml/dataframe/copy_to.py +8 -3
  45. teradataml/dataframe/data_transfer.py +11 -1
  46. teradataml/dataframe/dataframe.py +517 -121
  47. teradataml/dataframe/dataframe_utils.py +152 -20
  48. teradataml/dataframe/functions.py +26 -11
  49. teradataml/dataframe/setop.py +11 -6
  50. teradataml/dataframe/sql.py +2 -2
  51. teradataml/dbutils/dbutils.py +525 -129
  52. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  53. teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +317 -1011
  54. teradataml/opensource/_class.py +141 -17
  55. teradataml/opensource/{constants.py → _constants.py} +7 -3
  56. teradataml/opensource/_lightgbm.py +52 -53
  57. teradataml/opensource/_sklearn.py +1008 -0
  58. teradataml/opensource/_wrapper_utils.py +5 -5
  59. teradataml/options/__init__.py +47 -15
  60. teradataml/options/configure.py +103 -25
  61. teradataml/options/display.py +13 -2
  62. teradataml/plot/axis.py +47 -8
  63. teradataml/plot/figure.py +33 -0
  64. teradataml/plot/plot.py +63 -13
  65. teradataml/scriptmgmt/UserEnv.py +2 -2
  66. teradataml/scriptmgmt/lls_utils.py +63 -26
  67. teradataml/store/__init__.py +1 -2
  68. teradataml/store/feature_store/feature_store.py +102 -7
  69. teradataml/table_operators/Apply.py +32 -18
  70. teradataml/table_operators/Script.py +3 -1
  71. teradataml/table_operators/TableOperator.py +3 -1
  72. teradataml/utils/dtypes.py +47 -0
  73. teradataml/utils/internal_buffer.py +18 -0
  74. teradataml/utils/validators.py +68 -9
  75. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +123 -2
  76. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +79 -75
  77. teradataml/data/SQL_Fundamentals.pdf +0 -0
  78. teradataml/libaed_0_1.dylib +0 -0
  79. teradataml/libaed_0_1.so +0 -0
  80. teradataml/opensource/sklearn/__init__.py +0 -0
  81. teradataml/store/vector_store/__init__.py +0 -1586
  82. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  83. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  84. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
@@ -31,8 +31,11 @@ from teradataml import ScaleTransform
31
31
  from teradataml import SimpleImputeTransform
32
32
  from teradataml import TargetEncodingTransform
33
33
  from teradataml import Transform, UtilFuncs, TeradataConstants
34
+ from teradataml import execute_sql
34
35
  from teradataml.common.garbagecollector import GarbageCollector
35
36
  from teradataml.hyperparameter_tuner.utils import _ProgressBar
37
+ from teradataml.options.configure import configure
38
+ from teradataml.common.constants import TeradataConstants
36
39
 
37
40
  # AutoML Internal libraries
38
41
  from teradataml.automl.feature_exploration import _FeatureExplore
@@ -219,11 +222,11 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
219
222
  DESCRIPTION:
220
223
  Function drops irrelevent columns and adds id column.
221
224
  """
222
- # Extracting irrelevent column list
225
+ # Extracting irrelevant column list
223
226
  columns_to_be_removed = self.data_transformation_params.get("drop_irrelevent_columns", None)
224
227
  if columns_to_be_removed:
225
228
  self.data = self.data.drop(columns_to_be_removed, axis=1)
226
- self._display_msg(msg="\nUpdated dataset after dropping irrelevent columns :",
229
+ self._display_msg(msg="\nUpdated dataset after dropping irrelevant columns :",
227
230
  data=self.data,
228
231
  progress_bar=self.progress_bar)
229
232
 
@@ -693,22 +696,28 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
693
696
  lasso_scale_fit_obj = self.data_transformation_params.get("lasso_scale_fit_obj", None)
694
697
  lasso_scale_col = self.data_transformation_params.get("lasso_scale_col", None)
695
698
  # Extracting accumulate columns
696
- accumulate_cols = self._extract_list(lasso_df.columns, lasso_scale_col)
697
- # Scaling dataset
698
- lasso_df = ScaleTransform(data=lasso_df,
699
- object=lasso_scale_fit_obj,
700
- accumulate=accumulate_cols).result
701
- # Displaying scaled dataset
702
- self._display_msg(msg="\nUpdated dataset after performing scaling on Lasso selected features :",
703
- data=lasso_df,
704
- progress_bar=self.progress_bar)
699
+ if lasso_scale_fit_obj is not None:
700
+ accumulate_cols = self._extract_list(lasso_df.columns, lasso_scale_col)
701
+ # Scaling dataset
702
+ lasso_df = ScaleTransform(data=lasso_df,
703
+ object=lasso_scale_fit_obj,
704
+ accumulate=accumulate_cols).result
705
+ # Displaying scaled dataset
706
+ self._display_msg(msg="\nUpdated dataset after performing scaling on Lasso selected features :",
707
+ data=lasso_df,
708
+ progress_bar=self.progress_bar)
705
709
 
706
710
  # Uploading lasso dataset to table for further use
707
711
  table_name = UtilFuncs._generate_temp_table_name(prefix="lasso_new_test",
708
712
  table_type = TeradataConstants.TERADATA_TABLE)
713
+ # If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
714
+ # table name in fully qualified format.
715
+ table_name = UtilFuncs._extract_table_name(table_name)
709
716
  # Storing table name mapping for lasso dataset
710
717
  self.table_name_mapping[self.data_node_id]["lasso_new_test"] = table_name
711
- copy_to_sql(df = lasso_df, table_name= table_name, if_exists="replace")
718
+ # In the case of the VT option, the table was being persisted, so the VT condition is being checked.
719
+ is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
720
+ copy_to_sql(df = lasso_df, table_name= table_name, if_exists="replace", temporary=is_temporary)
712
721
 
713
722
  def _feature_selection_rfe_transformation(self):
714
723
  """
@@ -730,23 +739,30 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
730
739
  # Extracting fit object and columns for scaling
731
740
  rfe_scale_fit_obj = self.data_transformation_params.get("rfe_scale_fit_obj", None)
732
741
  rfe_scale_col = self.data_transformation_params.get("rfe_scale_col", None)
733
- # Extracting accumulate columns
734
- accumulate_cols = self._extract_list(rfe_df.columns, rfe_scale_col)
735
- # Scaling on rfe dataset
736
- rfe_df = ScaleTransform(data=rfe_df,
737
- object=rfe_scale_fit_obj,
738
- accumulate=accumulate_cols).result
739
- # Displaying scaled dataset
740
- self._display_msg(msg="\nUpdated dataset after performing scaling on RFE selected features :",
741
- data=rfe_df,
742
- progress_bar=self.progress_bar)
742
+
743
+ if rfe_scale_fit_obj is not None:
744
+ # Extracting accumulate columns
745
+ accumulate_cols = self._extract_list(rfe_df.columns, rfe_scale_col)
746
+ # Scaling on rfe dataset
747
+ rfe_df = ScaleTransform(data=rfe_df,
748
+ object=rfe_scale_fit_obj,
749
+ accumulate=accumulate_cols).result
750
+ # Displaying scaled dataset
751
+ self._display_msg(msg="\nUpdated dataset after performing scaling on RFE selected features :",
752
+ data=rfe_df,
753
+ progress_bar=self.progress_bar)
743
754
 
744
755
  # Uploading rfe dataset to table for further use
745
756
  table_name = UtilFuncs._generate_temp_table_name(prefix="rfe_new_test",
746
757
  table_type = TeradataConstants.TERADATA_TABLE)
758
+ # If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
759
+ # table name in fully qualified format.
760
+ table_name = UtilFuncs._extract_table_name(table_name)
747
761
  # Storing table name mapping for rfe dataset
748
762
  self.table_name_mapping[self.data_node_id]["rfe_new_test"] = table_name
749
- copy_to_sql(df = rfe_df, table_name= table_name, if_exists="replace")
763
+ # In the case of the VT option, the table was being persisted, so the VT condition is being checked.
764
+ is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
765
+ copy_to_sql(df = rfe_df, table_name= table_name, if_exists="replace", temporary=is_temporary)
750
766
 
751
767
  def _feature_selection_pca_transformation(self):
752
768
  """
@@ -758,17 +774,20 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
758
774
  pca_scale_col = self.data_transformation_params.get("pca_scale_col", None)
759
775
  # Extracting accumulate columns
760
776
  accumulate_cols = self._extract_list(self.data.columns, pca_scale_col)
761
- # Scaling on pca dataset
762
- pca_scaled_df = ScaleTransform(data=self.data,
763
- object=pca_scale_fit_obj,
764
- accumulate=accumulate_cols).result
765
- # Displaying scaled dataset
766
- self._display_msg(msg="\nUpdated dataset after performing scaling for PCA feature selection :",
767
- data=pca_scaled_df,
768
- progress_bar=self.progress_bar)
777
+
778
+ pca_scaled_df = self.data
779
+ if pca_scale_fit_obj is not None:
780
+ # Scaling on pca dataset
781
+ pca_scaled_df = ScaleTransform(data=self.data,
782
+ object=pca_scale_fit_obj,
783
+ accumulate=accumulate_cols).result
784
+ # Displaying scaled dataset
785
+ self._display_msg(msg="\nUpdated dataset after performing scaling for PCA feature selection :",
786
+ data=pca_scaled_df,
787
+ progress_bar=self.progress_bar)
769
788
 
770
789
  # Convert to pandas dataframe for applying pca
771
- pca_scaled_pd = pca_scaled_df.to_pandas()
790
+ pca_scaled_pd = pca_scaled_df.to_pandas().reset_index()
772
791
  # Extracting pca fit instance for applying pca
773
792
  pca_fit_instance = self.data_transformation_params.get("pca_fit_instance", None)
774
793
  # Extracting columns for applying pca
@@ -804,6 +823,12 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
804
823
  # Uploading pca dataset to table for further use
805
824
  table_name = UtilFuncs._generate_temp_table_name(prefix="pca_new_test",
806
825
  table_type = TeradataConstants.TERADATA_TABLE)
826
+ # If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
827
+ # table name in fully qualified format.
828
+ table_name = UtilFuncs._extract_table_name(table_name)
807
829
  # Storing table name mapping for pca dataset
808
830
  self.table_name_mapping[self.data_node_id]["pca_new_test"] = table_name
809
- copy_to_sql(df = pca_df, table_name=table_name, if_exists="replace")
831
+ # In the case of the VT option, the table was being persisted, so the VT condition is being checked.
832
+ is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
833
+ copy_to_sql(df = pca_df, table_name=table_name, if_exists="replace", temporary=is_temporary)
834
+
@@ -41,6 +41,8 @@ from teradataml.dataframe.sql_functions import case
41
41
  from teradataml.hyperparameter_tuner.utils import _ProgressBar
42
42
  from teradataml.utils.validators import _Validators
43
43
  from teradataml.common.utils import UtilFuncs
44
+ from teradataml.common.constants import TeradataConstants
45
+ from teradataml.options.configure import configure
44
46
 
45
47
 
46
48
  class _FeatureEngineering:
@@ -132,8 +134,9 @@ class _FeatureEngineering:
132
134
  self.data_transform_dict = {}
133
135
  self.one_hot_obj_count = 0
134
136
  self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
135
- self.volatile = kwargs.get('volatile', False)
136
137
  self.persist = kwargs.get('persist', False)
138
+ self.volatile = kwargs.get('volatile', False) or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and self.persist is False)
139
+
137
140
 
138
141
  # Method for doing feature engineering on data -> adding id, removing futile col, imputation, encoding(one hot)
139
142
  def feature_engineering(self,
@@ -260,6 +263,11 @@ class _FeatureEngineering:
260
263
  Returns extracted elements in form of list.
261
264
 
262
265
  """
266
+ # Ensure list1 and list2 are lists, default to empty list if None
267
+ if list1 is None:
268
+ list1 = []
269
+ if list2 is None:
270
+ list2 = []
263
271
  new_lst = list(set(list1) - set(list2))
264
272
  return new_lst
265
273
 
@@ -348,12 +356,10 @@ class _FeatureEngineering:
348
356
 
349
357
  # Detecting and removing futile columns, if categorical_column exists
350
358
  if len(categorical_columns) != 0:
351
-
352
359
  obj = CategoricalSummary(data=self.data,
353
360
  target_columns=categorical_columns,
354
361
  volatile=self.volatile,
355
362
  persist=self.persist)
356
-
357
363
  gfc_out = GetFutileColumns(data=self.data,
358
364
  object=obj,
359
365
  category_summary_column="ColumnName",
@@ -1810,10 +1816,11 @@ class _FeatureEngineering:
1810
1816
  RETURNS:
1811
1817
  Tuple containing volatile and persist parameters.
1812
1818
  """
1813
- volatile = self.volatile
1819
+ # Prioritizing persist argument and then volatile
1814
1820
  persist = self.persist
1821
+ volatile = self.volatile or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and persist is False)
1815
1822
  if self.custom_data is not None and self.custom_data.get(func_indicator, False):
1816
1823
  volatile = self.custom_data[param_name].get("volatile", False)
1817
1824
  persist = self.custom_data[param_name].get("persist", False)
1818
1825
 
1819
- return (volatile, persist)
1826
+ return (volatile, persist)
@@ -26,10 +26,10 @@ from teradataml.context import context as tdmlctx
26
26
  from teradataml.dataframe.copy_to import copy_to_sql
27
27
  from teradataml.dataframe.dataframe import DataFrame
28
28
  from teradataml import execute_sql, get_connection
29
- from teradataml import SVM, GLM, DecisionForest, XGBoost, GridSearch, KNN, RandomSearch
29
+ from teradataml import configure, SVM, GLM, DecisionForest, XGBoost, GridSearch, KNN, RandomSearch
30
30
  from teradataml.utils.validators import _Validators
31
31
  from teradataml.common.utils import UtilFuncs
32
-
32
+ from teradataml.common.constants import TeradataConstants
33
33
 
34
34
  class _ModelTraining:
35
35
 
@@ -114,6 +114,12 @@ class _ModelTraining:
114
114
  session.
115
115
  Default Value: False
116
116
  Types: bool
117
+
118
+ seed:
119
+ Optional Argument.
120
+ Specifies the random seed for reproducibility.
121
+ Default Value: 42
122
+ Types: int
117
123
  """
118
124
  self.data = data
119
125
  self.target_column = target_column
@@ -126,6 +132,7 @@ class _ModelTraining:
126
132
  self.startify_col = None
127
133
  self.persist = kwargs.get("persist", False)
128
134
  self.volatile = kwargs.get("volatile", False)
135
+ self.seed = kwargs.get("seed", 42)
129
136
 
130
137
  def model_training(self,
131
138
  auto=True,
@@ -499,7 +506,7 @@ class _ModelTraining:
499
506
  'max_depth': tuple(max_depth),
500
507
  'min_node_size': tuple(min_node_size),
501
508
  'iter_num': tuple(iter_num),
502
- 'seed':42
509
+ 'seed':self.seed
503
510
  }
504
511
  # Hyperparameters for Decision Forest model
505
512
  df_params = {
@@ -510,7 +517,7 @@ class _ModelTraining:
510
517
  'max_depth': tuple(max_depth),
511
518
  'min_node_size': tuple(min_node_size),
512
519
  'num_trees': tuple(num_trees),
513
- 'seed':42
520
+ 'seed':self.seed
514
521
  }
515
522
 
516
523
  # Updating model type in case of classification
@@ -874,16 +881,30 @@ class _ModelTraining:
874
881
  verbose = 0
875
882
 
876
883
  # Hyperparameter tunning
884
+ # Parallel run opens multiple connections for parallel execution,
885
+ # but volatile tables are not accessible across different sessions.
886
+ # Therefore, execution is performed sequentially by setting run_parallel=False.
887
+
888
+ run_parallel = configure.temp_object_type != TeradataConstants.TERADATA_VOLATILE_TABLE
889
+
890
+ common_params = {
891
+ "data": train_data,
892
+ "evaluation_metric": self.stopping_metric,
893
+ "early_stop": self.stopping_tolerance,
894
+ "run_parallel": run_parallel,
895
+ "sample_seed": self.seed,
896
+ "sample_id_column": "id",
897
+ "discard_invalid_column_params": True,
898
+ "stratify_column": self.startify_col,
899
+ "verbose": verbose,
900
+ "max_time": self.max_runtime_secs,
901
+ "suppress_refer_msg": True
902
+ }
903
+
877
904
  if model_param['name'] == 'knn':
878
- _obj.fit(data=train_data, evaluation_metric=self.stopping_metric,
879
- early_stop=self.stopping_tolerance, run_parallel=True,
880
- sample_seed=42, sample_id_column='id', discard_invalid_column_params=True,
881
- stratify_column=self.startify_col,verbose=verbose, max_time=self.max_runtime_secs)
905
+ _obj.fit(**common_params)
882
906
  else:
883
- _obj.fit(data=train_data, evaluation_metric=self.stopping_metric,
884
- early_stop=self.stopping_tolerance, **eval_params,
885
- run_parallel=True, discard_invalid_column_params=True, sample_seed=42,
886
- sample_id_column='id',stratify_column=self.startify_col, verbose=verbose, max_time=self.max_runtime_secs)
907
+ _obj.fit(**common_params, **eval_params)
887
908
 
888
909
  # Getting all passed models
889
910
  model_info = _obj.model_stats.merge(_obj.models[_obj.models['STATUS']=='PASS'][['MODEL_ID', 'DATA_ID', 'PARAMETERS']],
@@ -992,4 +1013,4 @@ class _ModelTraining:
992
1013
  elif ml_name == 'glm':
993
1014
  eval_params['family'] = 'GAUSSIAN'
994
1015
 
995
- return eval_params
1016
+ return eval_params
@@ -1,2 +1 @@
1
- from teradataml.common.formula import as_categorical
2
- from teradataml.common.constants import Action, Permission
1
+ from teradataml.common.formula import as_categorical
@@ -14,7 +14,6 @@ A class for holding all constants
14
14
  import re
15
15
  import sqlalchemy
16
16
  from enum import Enum
17
- from teradataml.options.configure import configure
18
17
  from teradatasqlalchemy.types import (INTEGER, SMALLINT, BIGINT, BYTEINT, DECIMAL, FLOAT, NUMBER, VARCHAR)
19
18
  from teradatasqlalchemy.types import (DATE, TIME, TIMESTAMP)
20
19
  from teradatasqlalchemy.types import (BYTE, VARBYTE, BLOB)
@@ -62,6 +61,8 @@ class SQLConstants(Enum):
62
61
  SQL_DELETE_SPECIFIC_ROW = 30
63
62
  SQL_EXEC_STORED_PROCEDURE = 31
64
63
  SQL_SELECT_COLUMNNAMES_WITH_WHERE = 32
64
+ SQL_HELP_DATABASE = 33
65
+ SQL_HELP_DATALAKE = 34
65
66
  CONSTRAINT = ["check_constraint", "primary_key_constraint",
66
67
  "foreign_key_constraint", "unique_key_constraint"]
67
68
 
@@ -485,11 +486,18 @@ class TableOperatorConstants(Enum):
485
486
 
486
487
  # Check if Python interpretor and add-ons are installed or not.
487
488
  # Location of In-DB packages is indicated by configure.indb_install_location.
489
+ # Check for both python and pip versions.
488
490
  CHECK_PYTHON_INSTALLED = """SELECT distinct * FROM SCRIPT(
489
491
  ON (select 1) PARTITION BY ANY
490
- SCRIPT_COMMAND('{}/bin/pip3 --version')
491
- returns('package VARCHAR(256)'))
492
+ SCRIPT_COMMAND('echo $({0}/bin/pip3 --version) -- $({0}/bin/python3 --version)')
493
+ returns('pip VARCHAR(256)'))
492
494
  """
495
+ # Check which version of rpms are installed.
496
+ INDB_PYTHON_PATH = """SEL DISTINCT os_ver
497
+ FROM SCRIPT(
498
+ SCRIPT_COMMAND('grep CPE_NAME /etc/os-release')
499
+ RETURNS('os_ver VARCHAR(100)')
500
+ );"""
493
501
 
494
502
  # Script Query to get Python packages and corresponding versions.
495
503
  # Location of In-DB packages is indicated by configure.indb_install_location.
@@ -802,7 +810,8 @@ class ValibConstants(Enum):
802
810
  "subdivision_method": "subdivisionmethod",
803
811
  "subdivision_threshold": "subdivisionthreshold",
804
812
  "filter": "where",
805
- "gen_sql_only": "gensqlonly"
813
+ "gen_sql_only": "gensqlonly",
814
+ "charset": "charset"
806
815
  },
807
816
 
808
817
  "DATAEXPLORER": {
@@ -819,7 +828,8 @@ class ValibConstants(Enum):
819
828
  "stats_options": "statsoptions",
820
829
  "distinct": "uniques",
821
830
  "filter": "where",
822
- "gen_sql": "gensql"
831
+ "gen_sql": "gensql",
832
+ "charset": "charset"
823
833
  },
824
834
 
825
835
  "FREQUENCY": {
@@ -833,7 +843,8 @@ class ValibConstants(Enum):
833
843
  "style": "style",
834
844
  "top_n": "topvalues",
835
845
  "filter": "where",
836
- "gen_sql_only": "gensqlonly"
846
+ "gen_sql_only": "gensqlonly",
847
+ "charset": "charset"
837
848
  },
838
849
 
839
850
  "HISTOGRAM": {
@@ -848,7 +859,8 @@ class ValibConstants(Enum):
848
859
  "stats_columns": "statisticscolumns",
849
860
  "hist_style": "style",
850
861
  "filter": "where",
851
- "gen_sql_only": "gensqlonly"
862
+ "gen_sql_only": "gensqlonly",
863
+ "charset": "charset"
852
864
  },
853
865
 
854
866
  "STATISTICS": {
@@ -859,7 +871,8 @@ class ValibConstants(Enum):
859
871
  "statistical_method": "statisticalmethod",
860
872
  "stats_options": "statsoptions",
861
873
  "filter": "where",
862
- "gen_sql_only": "gensqlonly"
874
+ "gen_sql_only": "gensqlonly",
875
+ "charset": "charset"
863
876
  },
864
877
 
865
878
  "TEXTFIELDANALYZER": {
@@ -867,7 +880,8 @@ class ValibConstants(Enum):
867
880
  "exclude_columns": "columnstoexclude",
868
881
  "analyze_numerics": "extendednumericanalysis",
869
882
  "analyze_unicode": "extendedunicodeanalysis",
870
- "gen_sql_only": "gensqlonly"
883
+ "gen_sql_only": "gensqlonly",
884
+ "charset": "charset"
871
885
  },
872
886
 
873
887
  "VALUES": {
@@ -876,7 +890,8 @@ class ValibConstants(Enum):
876
890
  "group_columns": "groupby",
877
891
  "distinct": "uniques",
878
892
  "filter": "where",
879
- "gen_sql_only": "gensqlonly"
893
+ "gen_sql_only": "gensqlonly",
894
+ "charset": "charset"
880
895
  },
881
896
 
882
897
  "ASSOCIATION": {
@@ -901,7 +916,8 @@ class ValibConstants(Enum):
901
916
  "filter": "where",
902
917
  "no_support_results": "dropsupporttables",
903
918
  "support_result_prefix": "resulttableprefix",
904
- "gen_sql_only": "gensqlonly"
919
+ "gen_sql_only": "gensqlonly",
920
+ "charset": "charset"
905
921
  },
906
922
 
907
923
  "KMEANS": {
@@ -911,7 +927,8 @@ class ValibConstants(Enum):
911
927
  "continuation": "continuation",
912
928
  "max_iter": "iterations",
913
929
  "operator_database": "operatordatabase",
914
- "threshold": "threshold"
930
+ "threshold": "threshold",
931
+ "charset": "charset"
915
932
  },
916
933
 
917
934
  "KMEANSSCORE": {
@@ -919,7 +936,8 @@ class ValibConstants(Enum):
919
936
  "cluster_column": "clustername",
920
937
  "fallback": "fallback",
921
938
  "operator_database": "operatordatabase",
922
- "accumulate": "retain"
939
+ "accumulate": "retain",
940
+ "charset": "charset"
923
941
  },
924
942
 
925
943
  "DECISIONTREE": {
@@ -931,7 +949,8 @@ class ValibConstants(Enum):
931
949
  "max_depth": "max_depth",
932
950
  "num_splits": "min_records",
933
951
  "operator_database": "operatordatabase",
934
- "pruning": "pruning"
952
+ "pruning": "pruning",
953
+ "charset": "charset"
935
954
  },
936
955
 
937
956
  "DECISIONTREESCORE": {
@@ -941,7 +960,8 @@ class ValibConstants(Enum):
941
960
  "profile": "profiletables",
942
961
  "accumulate": "retain",
943
962
  "targeted_value": "targetedvalue",
944
- "gen_sql_only": "gensqlonly"
963
+ "gen_sql_only": "gensqlonly",
964
+ "charset": "charset"
945
965
  },
946
966
 
947
967
  "MATRIX": {
@@ -951,7 +971,8 @@ class ValibConstants(Enum):
951
971
  "matrix_output": "matrixoutput",
952
972
  "type": "matrixtype",
953
973
  "handle_nulls": "nullhandling",
954
- "filter": "where"
974
+ "filter": "where",
975
+ "charset": "charset"
955
976
  },
956
977
 
957
978
  "LINEAR": {
@@ -973,7 +994,8 @@ class ValibConstants(Enum):
973
994
  "stepwise": "stepwise",
974
995
  "use_fstat": "usefstat",
975
996
  "use_pvalue": "usepvalue",
976
- "variance_prop_threshold": "varianceproportionthreshold"
997
+ "variance_prop_threshold": "varianceproportionthreshold",
998
+ "charset": "charset"
977
999
  },
978
1000
 
979
1001
  "LINEARSCORE": {
@@ -981,7 +1003,8 @@ class ValibConstants(Enum):
981
1003
  "response_column": "predicted",
982
1004
  "residual_column": "residual",
983
1005
  "accumulate": "retain",
984
- "gen_sql_only": "gensqlonly"
1006
+ "gen_sql_only": "gensqlonly",
1007
+ "charset": "charset"
985
1008
  },
986
1009
 
987
1010
  "LOGISTIC": {
@@ -1011,7 +1034,8 @@ class ValibConstants(Enum):
1011
1034
  "end_threshold": "thresholdend",
1012
1035
  "increment_threshold": "thresholdincrement",
1013
1036
  "threshold_output": "thresholdtable",
1014
- "variance_prop_threshold": "varianceproportionthreshold"
1037
+ "variance_prop_threshold": "varianceproportionthreshold",
1038
+ "charset": "charset"
1015
1039
  },
1016
1040
 
1017
1041
  "LOGISTICSCORE": {
@@ -1023,7 +1047,8 @@ class ValibConstants(Enum):
1023
1047
  "start_threshold": "thresholdbegin",
1024
1048
  "end_threshold": "thresholdend",
1025
1049
  "increment_threshold": "thresholdincrement",
1026
- "gen_sql_only": "gensqlonly"
1050
+ "gen_sql_only": "gensqlonly",
1051
+ "charset": "charset"
1027
1052
 
1028
1053
  # The following 3 arguments three should not be present for LogRegPredict function
1029
1054
  # where as when the function is LogRegEvaluator, at least one of these should be
@@ -1051,13 +1076,15 @@ class ValibConstants(Enum):
1051
1076
  "rotation_type": "rotationtype",
1052
1077
  "load_threshold": "thresholdloading",
1053
1078
  "percent_threshold": "thresholdpercent",
1054
- "variance_prop_threshold": "varianceproportionthreshold"
1079
+ "variance_prop_threshold": "varianceproportionthreshold",
1080
+ "charset": "charset"
1055
1081
  },
1056
1082
 
1057
1083
  "FACTORSCORE": {
1058
1084
  "index_columns": "index",
1059
1085
  "accumulate": "retain",
1060
- "gen_sql_only": "gensqlonly"
1086
+ "gen_sql_only": "gensqlonly",
1087
+ "charset": "charset"
1061
1088
  },
1062
1089
 
1063
1090
  "PARAMETRICTEST": {
@@ -1076,7 +1103,8 @@ class ValibConstants(Enum):
1076
1103
  "style": "teststyle",
1077
1104
  "probability_threshold": "thresholdprobability",
1078
1105
  "with_indicator": "withindicator",
1079
- "gen_sql_only": "gensqlonly"
1106
+ "gen_sql_only": "gensqlonly",
1107
+ "charset": "charset"
1080
1108
  },
1081
1109
 
1082
1110
  "BINOMIALTEST": {
@@ -1091,7 +1119,8 @@ class ValibConstants(Enum):
1091
1119
  "stats_database": "statsdatabase",
1092
1120
  "style": "teststyle",
1093
1121
  "probability_threshold": "thresholdprobability",
1094
- "gen_sql_only": "gensqlonly"
1122
+ "gen_sql_only": "gensqlonly",
1123
+ "charset": "charset"
1095
1124
  },
1096
1125
 
1097
1126
  "KSTEST": {
@@ -1103,7 +1132,8 @@ class ValibConstants(Enum):
1103
1132
  "stats_database": "statsdatabase",
1104
1133
  "style": "teststyle",
1105
1134
  "probability_threshold": "thresholdprobability",
1106
- "gen_sql_only": "gensqlonly"
1135
+ "gen_sql_only": "gensqlonly",
1136
+ "charset": "charset"
1107
1137
  },
1108
1138
 
1109
1139
  "CHISQUARETEST": {
@@ -1117,7 +1147,8 @@ class ValibConstants(Enum):
1117
1147
  "stats_database": "statsdatabase",
1118
1148
  "style": "teststyle",
1119
1149
  "probability_threshold": "thresholdprobability",
1120
- "gen_sql_only": "gensqlonly"
1150
+ "gen_sql_only": "gensqlonly",
1151
+ "charset": "charset"
1121
1152
  },
1122
1153
 
1123
1154
  "RANKTEST": {
@@ -1136,7 +1167,8 @@ class ValibConstants(Enum):
1136
1167
  "style": "teststyle",
1137
1168
  "probability_threshold": "thresholdprobability",
1138
1169
  "treatment_column": "treatmentcolumn",
1139
- "gen_sql_only": "gensqlonly"
1170
+ "gen_sql_only": "gensqlonly",
1171
+ "charset": "charset"
1140
1172
  },
1141
1173
 
1142
1174
  "VARTRAN": {
@@ -1147,13 +1179,15 @@ class ValibConstants(Enum):
1147
1179
  "allow_duplicates": "multiset",
1148
1180
  "nopi": "noindex",
1149
1181
  "filter": "whereclause",
1150
- "gen_sql_only": "gensqlonly"
1182
+ "gen_sql_only": "gensqlonly",
1183
+ "charset": "charset"
1151
1184
  },
1152
1185
 
1153
1186
  "REPORT": {
1154
1187
  "analysis_type": "analysistype",
1155
1188
  "filter": "where",
1156
- "gen_sql_only": "gensqlonly"
1189
+ "gen_sql_only": "gensqlonly",
1190
+ "charset": "charset"
1157
1191
  }
1158
1192
  }
1159
1193
 
@@ -1448,6 +1482,7 @@ class HTTPRequest(Enum):
1448
1482
  POST = "post"
1449
1483
  PUT = "put"
1450
1484
  DELETE = "delete"
1485
+ PATCH = "patch"
1451
1486
 
1452
1487
 
1453
1488
  class AsyncStatusColumns(Enum):
@@ -1497,14 +1532,3 @@ class SessionParamsPythonNames:
1497
1532
  DATABASE = "Current DataBase"
1498
1533
  DATEFORM = 'Current DateForm'
1499
1534
 
1500
-
1501
- class Action(Enum):
1502
- # Holds variable names for the type of grant to be provided.
1503
- GRANT = "GRANT"
1504
- REVOKE = "REVOKE"
1505
-
1506
- class Permission(Enum):
1507
- # Holds variable names for the type of permission to be provided.
1508
- READ = "READ"
1509
- WRITE = "WRITE"
1510
-
@@ -164,7 +164,7 @@ class ErrorInfoCodes(Enum):
164
164
  AED_SETOP_INPUT_TABLE_COLUMNS_COUNT_MISMATCH = 'TDML_2111'
165
165
  AED_SHOW_QUERY_MULTIPLE_OPTIONS = 'TDML_2112'
166
166
 
167
- # Table Operator Error Codes starting from 2300 - Reserved till 2313
167
+ # Table Operator Error Codes starting from 2300 - Reserved till 2314
168
168
  INPUT_FILE_NOT_FOUND = 'TDML_2300'
169
169
  REMOVE_FILE_FAILED = 'TDML_2301'
170
170
  INSTALL_FILE_FAILED = 'TDML_2302'
@@ -175,6 +175,8 @@ class ErrorInfoCodes(Enum):
175
175
  NOT_ALLOWED_VALUES = 'TDML_2307'
176
176
  ARGUMENT_VALUE_SAME = 'TDML_2308'
177
177
  PYTHON_NOT_INSTALLED = 'TDML_2309'
178
+ PYTHON_VERSION_MISMATCH = 'TDML_2310'
179
+ PYTHON_VERSION_MISMATCH_OAF = 'TDML_2416'
178
180
  EMPTY_FILE = 'TDML_2311'
179
181
  ARG_NONE = 'TDML_2312'
180
182
  EITHER_FUNCTION_OR_ARGS = 'TDML_2313'
@@ -223,7 +225,7 @@ class ErrorInfoCodes(Enum):
223
225
  TARGET_COL_NOT_FOUND_FOR_EVALUATE = 'TDML_2541'
224
226
 
225
227
  # OpenAF Error codes starting from 2551 - Reserved till 2560.
226
- AUTH_TOKEN_REQUIRED = 'TDML_2551'
228
+ SET_REQUIRED_PARAMS = 'TDML_2551'
227
229
 
228
230
  class MessageCodes(Enum):
229
231
  """
@@ -396,6 +398,10 @@ class MessageCodes(Enum):
396
398
 
397
399
  PYTHON_NOT_INSTALLED = "Python is not installed on Vantage. " \
398
400
  "Please install Python interpreter and add-on packages on Vantage."
401
+ PYTHON_VERSION_MISMATCH = "Python version on Vantage is not same as that of local environment. " \
402
+ "Use the same Python version '{}' or '{}.x' in local environment."
403
+ PYTHON_VERSION_MISMATCH_OAF = "Python version of Lake user environment '{}' is not same as that of local environment '{}'. " \
404
+ "Maintain similar version of Python between Lake user environment and local environment."
399
405
  IMPORT_PYTHON_PACKAGE = "Module '{}' not found. Install '{}' before running {}()."
400
406
  INT_ARGUMENT_COMPARISON = "Argument '{}' must be {} to argument '{}'"
401
407
  EXECUTION_FAILED = "Failed to {}. {}"
@@ -430,4 +436,8 @@ class MessageCodes(Enum):
430
436
  PATH_NOT_FOUND = "Specified local path '{}' not found. Please check the path."
431
437
  TARGET_COL_NOT_FOUND_FOR_EVALUATE = "Target column '{}' not found in the passed dataFrame. "\
432
438
  "evaluate() requires target column to be present in the dataFrame."
433
- AUTH_TOKEN_REQUIRED = "Authentication token is required to run '{}'. Set the token using set_auth_token()."
439
+ SET_REQUIRED_PARAMS = "{} is required to run '{}'. Set it using {}()."
440
+ CONNECTION_PARAMS = "Required connection parameters are missing. Connection parameters should either be " \
441
+ "explicitly passed to function or specified using a configuration file, or setting up " \
442
+ "the environment variables."
443
+