teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +119 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +18 -6
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/sqle/__init__.py +4 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +51 -6
- teradataml/automl/data_preparation.py +56 -33
- teradataml/automl/data_transformation.py +58 -33
- teradataml/automl/feature_engineering.py +12 -5
- teradataml/automl/model_training.py +34 -13
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +64 -40
- teradataml/common/messagecodes.py +13 -3
- teradataml/common/messages.py +4 -1
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +113 -39
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +141 -17
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +5 -5
- teradataml/data/teradataml_example.json +8 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +11 -1
- teradataml/dataframe/dataframe.py +517 -121
- teradataml/dataframe/dataframe_utils.py +152 -20
- teradataml/dataframe/functions.py +26 -11
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +2 -2
- teradataml/dbutils/dbutils.py +525 -129
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +317 -1011
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -25
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +2 -2
- teradataml/scriptmgmt/lls_utils.py +63 -26
- teradataml/store/__init__.py +1 -2
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +32 -18
- teradataml/table_operators/Script.py +3 -1
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/utils/dtypes.py +47 -0
- teradataml/utils/internal_buffer.py +18 -0
- teradataml/utils/validators.py +68 -9
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +123 -2
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +79 -75
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
|
@@ -31,8 +31,11 @@ from teradataml import ScaleTransform
|
|
|
31
31
|
from teradataml import SimpleImputeTransform
|
|
32
32
|
from teradataml import TargetEncodingTransform
|
|
33
33
|
from teradataml import Transform, UtilFuncs, TeradataConstants
|
|
34
|
+
from teradataml import execute_sql
|
|
34
35
|
from teradataml.common.garbagecollector import GarbageCollector
|
|
35
36
|
from teradataml.hyperparameter_tuner.utils import _ProgressBar
|
|
37
|
+
from teradataml.options.configure import configure
|
|
38
|
+
from teradataml.common.constants import TeradataConstants
|
|
36
39
|
|
|
37
40
|
# AutoML Internal libraries
|
|
38
41
|
from teradataml.automl.feature_exploration import _FeatureExplore
|
|
@@ -219,11 +222,11 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
|
|
|
219
222
|
DESCRIPTION:
|
|
220
223
|
Function drops irrelevent columns and adds id column.
|
|
221
224
|
"""
|
|
222
|
-
# Extracting
|
|
225
|
+
# Extracting irrelevant column list
|
|
223
226
|
columns_to_be_removed = self.data_transformation_params.get("drop_irrelevent_columns", None)
|
|
224
227
|
if columns_to_be_removed:
|
|
225
228
|
self.data = self.data.drop(columns_to_be_removed, axis=1)
|
|
226
|
-
self._display_msg(msg="\nUpdated dataset after dropping
|
|
229
|
+
self._display_msg(msg="\nUpdated dataset after dropping irrelevant columns :",
|
|
227
230
|
data=self.data,
|
|
228
231
|
progress_bar=self.progress_bar)
|
|
229
232
|
|
|
@@ -693,22 +696,28 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
|
|
|
693
696
|
lasso_scale_fit_obj = self.data_transformation_params.get("lasso_scale_fit_obj", None)
|
|
694
697
|
lasso_scale_col = self.data_transformation_params.get("lasso_scale_col", None)
|
|
695
698
|
# Extracting accumulate columns
|
|
696
|
-
|
|
697
|
-
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
702
|
-
|
|
703
|
-
|
|
704
|
-
|
|
699
|
+
if lasso_scale_fit_obj is not None:
|
|
700
|
+
accumulate_cols = self._extract_list(lasso_df.columns, lasso_scale_col)
|
|
701
|
+
# Scaling dataset
|
|
702
|
+
lasso_df = ScaleTransform(data=lasso_df,
|
|
703
|
+
object=lasso_scale_fit_obj,
|
|
704
|
+
accumulate=accumulate_cols).result
|
|
705
|
+
# Displaying scaled dataset
|
|
706
|
+
self._display_msg(msg="\nUpdated dataset after performing scaling on Lasso selected features :",
|
|
707
|
+
data=lasso_df,
|
|
708
|
+
progress_bar=self.progress_bar)
|
|
705
709
|
|
|
706
710
|
# Uploading lasso dataset to table for further use
|
|
707
711
|
table_name = UtilFuncs._generate_temp_table_name(prefix="lasso_new_test",
|
|
708
712
|
table_type = TeradataConstants.TERADATA_TABLE)
|
|
713
|
+
# If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
|
|
714
|
+
# table name in fully qualified format.
|
|
715
|
+
table_name = UtilFuncs._extract_table_name(table_name)
|
|
709
716
|
# Storing table name mapping for lasso dataset
|
|
710
717
|
self.table_name_mapping[self.data_node_id]["lasso_new_test"] = table_name
|
|
711
|
-
|
|
718
|
+
# In the case of the VT option, the table was being persisted, so the VT condition is being checked.
|
|
719
|
+
is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
|
|
720
|
+
copy_to_sql(df = lasso_df, table_name= table_name, if_exists="replace", temporary=is_temporary)
|
|
712
721
|
|
|
713
722
|
def _feature_selection_rfe_transformation(self):
|
|
714
723
|
"""
|
|
@@ -730,23 +739,30 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
|
|
|
730
739
|
# Extracting fit object and columns for scaling
|
|
731
740
|
rfe_scale_fit_obj = self.data_transformation_params.get("rfe_scale_fit_obj", None)
|
|
732
741
|
rfe_scale_col = self.data_transformation_params.get("rfe_scale_col", None)
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
738
|
-
|
|
739
|
-
|
|
740
|
-
|
|
741
|
-
|
|
742
|
-
|
|
742
|
+
|
|
743
|
+
if rfe_scale_fit_obj is not None:
|
|
744
|
+
# Extracting accumulate columns
|
|
745
|
+
accumulate_cols = self._extract_list(rfe_df.columns, rfe_scale_col)
|
|
746
|
+
# Scaling on rfe dataset
|
|
747
|
+
rfe_df = ScaleTransform(data=rfe_df,
|
|
748
|
+
object=rfe_scale_fit_obj,
|
|
749
|
+
accumulate=accumulate_cols).result
|
|
750
|
+
# Displaying scaled dataset
|
|
751
|
+
self._display_msg(msg="\nUpdated dataset after performing scaling on RFE selected features :",
|
|
752
|
+
data=rfe_df,
|
|
753
|
+
progress_bar=self.progress_bar)
|
|
743
754
|
|
|
744
755
|
# Uploading rfe dataset to table for further use
|
|
745
756
|
table_name = UtilFuncs._generate_temp_table_name(prefix="rfe_new_test",
|
|
746
757
|
table_type = TeradataConstants.TERADATA_TABLE)
|
|
758
|
+
# If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
|
|
759
|
+
# table name in fully qualified format.
|
|
760
|
+
table_name = UtilFuncs._extract_table_name(table_name)
|
|
747
761
|
# Storing table name mapping for rfe dataset
|
|
748
762
|
self.table_name_mapping[self.data_node_id]["rfe_new_test"] = table_name
|
|
749
|
-
|
|
763
|
+
# In the case of the VT option, the table was being persisted, so the VT condition is being checked.
|
|
764
|
+
is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
|
|
765
|
+
copy_to_sql(df = rfe_df, table_name= table_name, if_exists="replace", temporary=is_temporary)
|
|
750
766
|
|
|
751
767
|
def _feature_selection_pca_transformation(self):
|
|
752
768
|
"""
|
|
@@ -758,17 +774,20 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
|
|
|
758
774
|
pca_scale_col = self.data_transformation_params.get("pca_scale_col", None)
|
|
759
775
|
# Extracting accumulate columns
|
|
760
776
|
accumulate_cols = self._extract_list(self.data.columns, pca_scale_col)
|
|
761
|
-
|
|
762
|
-
pca_scaled_df =
|
|
763
|
-
|
|
764
|
-
|
|
765
|
-
|
|
766
|
-
|
|
767
|
-
|
|
768
|
-
|
|
777
|
+
|
|
778
|
+
pca_scaled_df = self.data
|
|
779
|
+
if pca_scale_fit_obj is not None:
|
|
780
|
+
# Scaling on pca dataset
|
|
781
|
+
pca_scaled_df = ScaleTransform(data=self.data,
|
|
782
|
+
object=pca_scale_fit_obj,
|
|
783
|
+
accumulate=accumulate_cols).result
|
|
784
|
+
# Displaying scaled dataset
|
|
785
|
+
self._display_msg(msg="\nUpdated dataset after performing scaling for PCA feature selection :",
|
|
786
|
+
data=pca_scaled_df,
|
|
787
|
+
progress_bar=self.progress_bar)
|
|
769
788
|
|
|
770
789
|
# Convert to pandas dataframe for applying pca
|
|
771
|
-
pca_scaled_pd = pca_scaled_df.to_pandas()
|
|
790
|
+
pca_scaled_pd = pca_scaled_df.to_pandas().reset_index()
|
|
772
791
|
# Extracting pca fit instance for applying pca
|
|
773
792
|
pca_fit_instance = self.data_transformation_params.get("pca_fit_instance", None)
|
|
774
793
|
# Extracting columns for applying pca
|
|
@@ -804,6 +823,12 @@ class _DataTransformation(_FeatureExplore, _FeatureEngineering):
|
|
|
804
823
|
# Uploading pca dataset to table for further use
|
|
805
824
|
table_name = UtilFuncs._generate_temp_table_name(prefix="pca_new_test",
|
|
806
825
|
table_type = TeradataConstants.TERADATA_TABLE)
|
|
826
|
+
# If configure.temp_object_type="VT", _generate_temp_table_name() retruns the
|
|
827
|
+
# table name in fully qualified format.
|
|
828
|
+
table_name = UtilFuncs._extract_table_name(table_name)
|
|
807
829
|
# Storing table name mapping for pca dataset
|
|
808
830
|
self.table_name_mapping[self.data_node_id]["pca_new_test"] = table_name
|
|
809
|
-
|
|
831
|
+
# In the case of the VT option, the table was being persisted, so the VT condition is being checked.
|
|
832
|
+
is_temporary = configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE
|
|
833
|
+
copy_to_sql(df = pca_df, table_name=table_name, if_exists="replace", temporary=is_temporary)
|
|
834
|
+
|
|
@@ -41,6 +41,8 @@ from teradataml.dataframe.sql_functions import case
|
|
|
41
41
|
from teradataml.hyperparameter_tuner.utils import _ProgressBar
|
|
42
42
|
from teradataml.utils.validators import _Validators
|
|
43
43
|
from teradataml.common.utils import UtilFuncs
|
|
44
|
+
from teradataml.common.constants import TeradataConstants
|
|
45
|
+
from teradataml.options.configure import configure
|
|
44
46
|
|
|
45
47
|
|
|
46
48
|
class _FeatureEngineering:
|
|
@@ -132,8 +134,9 @@ class _FeatureEngineering:
|
|
|
132
134
|
self.data_transform_dict = {}
|
|
133
135
|
self.one_hot_obj_count = 0
|
|
134
136
|
self.is_classification_type = lambda: self.task_type.upper() == 'CLASSIFICATION'
|
|
135
|
-
self.volatile = kwargs.get('volatile', False)
|
|
136
137
|
self.persist = kwargs.get('persist', False)
|
|
138
|
+
self.volatile = kwargs.get('volatile', False) or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and self.persist is False)
|
|
139
|
+
|
|
137
140
|
|
|
138
141
|
# Method for doing feature engineering on data -> adding id, removing futile col, imputation, encoding(one hot)
|
|
139
142
|
def feature_engineering(self,
|
|
@@ -260,6 +263,11 @@ class _FeatureEngineering:
|
|
|
260
263
|
Returns extracted elements in form of list.
|
|
261
264
|
|
|
262
265
|
"""
|
|
266
|
+
# Ensure list1 and list2 are lists, default to empty list if None
|
|
267
|
+
if list1 is None:
|
|
268
|
+
list1 = []
|
|
269
|
+
if list2 is None:
|
|
270
|
+
list2 = []
|
|
263
271
|
new_lst = list(set(list1) - set(list2))
|
|
264
272
|
return new_lst
|
|
265
273
|
|
|
@@ -348,12 +356,10 @@ class _FeatureEngineering:
|
|
|
348
356
|
|
|
349
357
|
# Detecting and removing futile columns, if categorical_column exists
|
|
350
358
|
if len(categorical_columns) != 0:
|
|
351
|
-
|
|
352
359
|
obj = CategoricalSummary(data=self.data,
|
|
353
360
|
target_columns=categorical_columns,
|
|
354
361
|
volatile=self.volatile,
|
|
355
362
|
persist=self.persist)
|
|
356
|
-
|
|
357
363
|
gfc_out = GetFutileColumns(data=self.data,
|
|
358
364
|
object=obj,
|
|
359
365
|
category_summary_column="ColumnName",
|
|
@@ -1810,10 +1816,11 @@ class _FeatureEngineering:
|
|
|
1810
1816
|
RETURNS:
|
|
1811
1817
|
Tuple containing volatile and persist parameters.
|
|
1812
1818
|
"""
|
|
1813
|
-
|
|
1819
|
+
# Prioritizing persist argument and then volatile
|
|
1814
1820
|
persist = self.persist
|
|
1821
|
+
volatile = self.volatile or (configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE and persist is False)
|
|
1815
1822
|
if self.custom_data is not None and self.custom_data.get(func_indicator, False):
|
|
1816
1823
|
volatile = self.custom_data[param_name].get("volatile", False)
|
|
1817
1824
|
persist = self.custom_data[param_name].get("persist", False)
|
|
1818
1825
|
|
|
1819
|
-
return (volatile, persist)
|
|
1826
|
+
return (volatile, persist)
|
|
@@ -26,10 +26,10 @@ from teradataml.context import context as tdmlctx
|
|
|
26
26
|
from teradataml.dataframe.copy_to import copy_to_sql
|
|
27
27
|
from teradataml.dataframe.dataframe import DataFrame
|
|
28
28
|
from teradataml import execute_sql, get_connection
|
|
29
|
-
from teradataml import SVM, GLM, DecisionForest, XGBoost, GridSearch, KNN, RandomSearch
|
|
29
|
+
from teradataml import configure, SVM, GLM, DecisionForest, XGBoost, GridSearch, KNN, RandomSearch
|
|
30
30
|
from teradataml.utils.validators import _Validators
|
|
31
31
|
from teradataml.common.utils import UtilFuncs
|
|
32
|
-
|
|
32
|
+
from teradataml.common.constants import TeradataConstants
|
|
33
33
|
|
|
34
34
|
class _ModelTraining:
|
|
35
35
|
|
|
@@ -114,6 +114,12 @@ class _ModelTraining:
|
|
|
114
114
|
session.
|
|
115
115
|
Default Value: False
|
|
116
116
|
Types: bool
|
|
117
|
+
|
|
118
|
+
seed:
|
|
119
|
+
Optional Argument.
|
|
120
|
+
Specifies the random seed for reproducibility.
|
|
121
|
+
Default Value: 42
|
|
122
|
+
Types: int
|
|
117
123
|
"""
|
|
118
124
|
self.data = data
|
|
119
125
|
self.target_column = target_column
|
|
@@ -126,6 +132,7 @@ class _ModelTraining:
|
|
|
126
132
|
self.startify_col = None
|
|
127
133
|
self.persist = kwargs.get("persist", False)
|
|
128
134
|
self.volatile = kwargs.get("volatile", False)
|
|
135
|
+
self.seed = kwargs.get("seed", 42)
|
|
129
136
|
|
|
130
137
|
def model_training(self,
|
|
131
138
|
auto=True,
|
|
@@ -499,7 +506,7 @@ class _ModelTraining:
|
|
|
499
506
|
'max_depth': tuple(max_depth),
|
|
500
507
|
'min_node_size': tuple(min_node_size),
|
|
501
508
|
'iter_num': tuple(iter_num),
|
|
502
|
-
'seed':
|
|
509
|
+
'seed':self.seed
|
|
503
510
|
}
|
|
504
511
|
# Hyperparameters for Decision Forest model
|
|
505
512
|
df_params = {
|
|
@@ -510,7 +517,7 @@ class _ModelTraining:
|
|
|
510
517
|
'max_depth': tuple(max_depth),
|
|
511
518
|
'min_node_size': tuple(min_node_size),
|
|
512
519
|
'num_trees': tuple(num_trees),
|
|
513
|
-
'seed':
|
|
520
|
+
'seed':self.seed
|
|
514
521
|
}
|
|
515
522
|
|
|
516
523
|
# Updating model type in case of classification
|
|
@@ -874,16 +881,30 @@ class _ModelTraining:
|
|
|
874
881
|
verbose = 0
|
|
875
882
|
|
|
876
883
|
# Hyperparameter tunning
|
|
884
|
+
# Parallel run opens multiple connections for parallel execution,
|
|
885
|
+
# but volatile tables are not accessible across different sessions.
|
|
886
|
+
# Therefore, execution is performed sequentially by setting run_parallel=False.
|
|
887
|
+
|
|
888
|
+
run_parallel = configure.temp_object_type != TeradataConstants.TERADATA_VOLATILE_TABLE
|
|
889
|
+
|
|
890
|
+
common_params = {
|
|
891
|
+
"data": train_data,
|
|
892
|
+
"evaluation_metric": self.stopping_metric,
|
|
893
|
+
"early_stop": self.stopping_tolerance,
|
|
894
|
+
"run_parallel": run_parallel,
|
|
895
|
+
"sample_seed": self.seed,
|
|
896
|
+
"sample_id_column": "id",
|
|
897
|
+
"discard_invalid_column_params": True,
|
|
898
|
+
"stratify_column": self.startify_col,
|
|
899
|
+
"verbose": verbose,
|
|
900
|
+
"max_time": self.max_runtime_secs,
|
|
901
|
+
"suppress_refer_msg": True
|
|
902
|
+
}
|
|
903
|
+
|
|
877
904
|
if model_param['name'] == 'knn':
|
|
878
|
-
_obj.fit(
|
|
879
|
-
early_stop=self.stopping_tolerance, run_parallel=True,
|
|
880
|
-
sample_seed=42, sample_id_column='id', discard_invalid_column_params=True,
|
|
881
|
-
stratify_column=self.startify_col,verbose=verbose, max_time=self.max_runtime_secs)
|
|
905
|
+
_obj.fit(**common_params)
|
|
882
906
|
else:
|
|
883
|
-
_obj.fit(
|
|
884
|
-
early_stop=self.stopping_tolerance, **eval_params,
|
|
885
|
-
run_parallel=True, discard_invalid_column_params=True, sample_seed=42,
|
|
886
|
-
sample_id_column='id',stratify_column=self.startify_col, verbose=verbose, max_time=self.max_runtime_secs)
|
|
907
|
+
_obj.fit(**common_params, **eval_params)
|
|
887
908
|
|
|
888
909
|
# Getting all passed models
|
|
889
910
|
model_info = _obj.model_stats.merge(_obj.models[_obj.models['STATUS']=='PASS'][['MODEL_ID', 'DATA_ID', 'PARAMETERS']],
|
|
@@ -992,4 +1013,4 @@ class _ModelTraining:
|
|
|
992
1013
|
elif ml_name == 'glm':
|
|
993
1014
|
eval_params['family'] = 'GAUSSIAN'
|
|
994
1015
|
|
|
995
|
-
return eval_params
|
|
1016
|
+
return eval_params
|
teradataml/common/__init__.py
CHANGED
|
@@ -1,2 +1 @@
|
|
|
1
|
-
from teradataml.common.formula import as_categorical
|
|
2
|
-
from teradataml.common.constants import Action, Permission
|
|
1
|
+
from teradataml.common.formula import as_categorical
|
teradataml/common/constants.py
CHANGED
|
@@ -14,7 +14,6 @@ A class for holding all constants
|
|
|
14
14
|
import re
|
|
15
15
|
import sqlalchemy
|
|
16
16
|
from enum import Enum
|
|
17
|
-
from teradataml.options.configure import configure
|
|
18
17
|
from teradatasqlalchemy.types import (INTEGER, SMALLINT, BIGINT, BYTEINT, DECIMAL, FLOAT, NUMBER, VARCHAR)
|
|
19
18
|
from teradatasqlalchemy.types import (DATE, TIME, TIMESTAMP)
|
|
20
19
|
from teradatasqlalchemy.types import (BYTE, VARBYTE, BLOB)
|
|
@@ -62,6 +61,8 @@ class SQLConstants(Enum):
|
|
|
62
61
|
SQL_DELETE_SPECIFIC_ROW = 30
|
|
63
62
|
SQL_EXEC_STORED_PROCEDURE = 31
|
|
64
63
|
SQL_SELECT_COLUMNNAMES_WITH_WHERE = 32
|
|
64
|
+
SQL_HELP_DATABASE = 33
|
|
65
|
+
SQL_HELP_DATALAKE = 34
|
|
65
66
|
CONSTRAINT = ["check_constraint", "primary_key_constraint",
|
|
66
67
|
"foreign_key_constraint", "unique_key_constraint"]
|
|
67
68
|
|
|
@@ -485,11 +486,18 @@ class TableOperatorConstants(Enum):
|
|
|
485
486
|
|
|
486
487
|
# Check if Python interpretor and add-ons are installed or not.
|
|
487
488
|
# Location of In-DB packages is indicated by configure.indb_install_location.
|
|
489
|
+
# Check for both python and pip versions.
|
|
488
490
|
CHECK_PYTHON_INSTALLED = """SELECT distinct * FROM SCRIPT(
|
|
489
491
|
ON (select 1) PARTITION BY ANY
|
|
490
|
-
SCRIPT_COMMAND('{}/bin/pip3 --version')
|
|
491
|
-
returns('
|
|
492
|
+
SCRIPT_COMMAND('echo $({0}/bin/pip3 --version) -- $({0}/bin/python3 --version)')
|
|
493
|
+
returns('pip VARCHAR(256)'))
|
|
492
494
|
"""
|
|
495
|
+
# Check which version of rpms are installed.
|
|
496
|
+
INDB_PYTHON_PATH = """SEL DISTINCT os_ver
|
|
497
|
+
FROM SCRIPT(
|
|
498
|
+
SCRIPT_COMMAND('grep CPE_NAME /etc/os-release')
|
|
499
|
+
RETURNS('os_ver VARCHAR(100)')
|
|
500
|
+
);"""
|
|
493
501
|
|
|
494
502
|
# Script Query to get Python packages and corresponding versions.
|
|
495
503
|
# Location of In-DB packages is indicated by configure.indb_install_location.
|
|
@@ -802,7 +810,8 @@ class ValibConstants(Enum):
|
|
|
802
810
|
"subdivision_method": "subdivisionmethod",
|
|
803
811
|
"subdivision_threshold": "subdivisionthreshold",
|
|
804
812
|
"filter": "where",
|
|
805
|
-
"gen_sql_only": "gensqlonly"
|
|
813
|
+
"gen_sql_only": "gensqlonly",
|
|
814
|
+
"charset": "charset"
|
|
806
815
|
},
|
|
807
816
|
|
|
808
817
|
"DATAEXPLORER": {
|
|
@@ -819,7 +828,8 @@ class ValibConstants(Enum):
|
|
|
819
828
|
"stats_options": "statsoptions",
|
|
820
829
|
"distinct": "uniques",
|
|
821
830
|
"filter": "where",
|
|
822
|
-
"gen_sql": "gensql"
|
|
831
|
+
"gen_sql": "gensql",
|
|
832
|
+
"charset": "charset"
|
|
823
833
|
},
|
|
824
834
|
|
|
825
835
|
"FREQUENCY": {
|
|
@@ -833,7 +843,8 @@ class ValibConstants(Enum):
|
|
|
833
843
|
"style": "style",
|
|
834
844
|
"top_n": "topvalues",
|
|
835
845
|
"filter": "where",
|
|
836
|
-
"gen_sql_only": "gensqlonly"
|
|
846
|
+
"gen_sql_only": "gensqlonly",
|
|
847
|
+
"charset": "charset"
|
|
837
848
|
},
|
|
838
849
|
|
|
839
850
|
"HISTOGRAM": {
|
|
@@ -848,7 +859,8 @@ class ValibConstants(Enum):
|
|
|
848
859
|
"stats_columns": "statisticscolumns",
|
|
849
860
|
"hist_style": "style",
|
|
850
861
|
"filter": "where",
|
|
851
|
-
"gen_sql_only": "gensqlonly"
|
|
862
|
+
"gen_sql_only": "gensqlonly",
|
|
863
|
+
"charset": "charset"
|
|
852
864
|
},
|
|
853
865
|
|
|
854
866
|
"STATISTICS": {
|
|
@@ -859,7 +871,8 @@ class ValibConstants(Enum):
|
|
|
859
871
|
"statistical_method": "statisticalmethod",
|
|
860
872
|
"stats_options": "statsoptions",
|
|
861
873
|
"filter": "where",
|
|
862
|
-
"gen_sql_only": "gensqlonly"
|
|
874
|
+
"gen_sql_only": "gensqlonly",
|
|
875
|
+
"charset": "charset"
|
|
863
876
|
},
|
|
864
877
|
|
|
865
878
|
"TEXTFIELDANALYZER": {
|
|
@@ -867,7 +880,8 @@ class ValibConstants(Enum):
|
|
|
867
880
|
"exclude_columns": "columnstoexclude",
|
|
868
881
|
"analyze_numerics": "extendednumericanalysis",
|
|
869
882
|
"analyze_unicode": "extendedunicodeanalysis",
|
|
870
|
-
"gen_sql_only": "gensqlonly"
|
|
883
|
+
"gen_sql_only": "gensqlonly",
|
|
884
|
+
"charset": "charset"
|
|
871
885
|
},
|
|
872
886
|
|
|
873
887
|
"VALUES": {
|
|
@@ -876,7 +890,8 @@ class ValibConstants(Enum):
|
|
|
876
890
|
"group_columns": "groupby",
|
|
877
891
|
"distinct": "uniques",
|
|
878
892
|
"filter": "where",
|
|
879
|
-
"gen_sql_only": "gensqlonly"
|
|
893
|
+
"gen_sql_only": "gensqlonly",
|
|
894
|
+
"charset": "charset"
|
|
880
895
|
},
|
|
881
896
|
|
|
882
897
|
"ASSOCIATION": {
|
|
@@ -901,7 +916,8 @@ class ValibConstants(Enum):
|
|
|
901
916
|
"filter": "where",
|
|
902
917
|
"no_support_results": "dropsupporttables",
|
|
903
918
|
"support_result_prefix": "resulttableprefix",
|
|
904
|
-
"gen_sql_only": "gensqlonly"
|
|
919
|
+
"gen_sql_only": "gensqlonly",
|
|
920
|
+
"charset": "charset"
|
|
905
921
|
},
|
|
906
922
|
|
|
907
923
|
"KMEANS": {
|
|
@@ -911,7 +927,8 @@ class ValibConstants(Enum):
|
|
|
911
927
|
"continuation": "continuation",
|
|
912
928
|
"max_iter": "iterations",
|
|
913
929
|
"operator_database": "operatordatabase",
|
|
914
|
-
"threshold": "threshold"
|
|
930
|
+
"threshold": "threshold",
|
|
931
|
+
"charset": "charset"
|
|
915
932
|
},
|
|
916
933
|
|
|
917
934
|
"KMEANSSCORE": {
|
|
@@ -919,7 +936,8 @@ class ValibConstants(Enum):
|
|
|
919
936
|
"cluster_column": "clustername",
|
|
920
937
|
"fallback": "fallback",
|
|
921
938
|
"operator_database": "operatordatabase",
|
|
922
|
-
"accumulate": "retain"
|
|
939
|
+
"accumulate": "retain",
|
|
940
|
+
"charset": "charset"
|
|
923
941
|
},
|
|
924
942
|
|
|
925
943
|
"DECISIONTREE": {
|
|
@@ -931,7 +949,8 @@ class ValibConstants(Enum):
|
|
|
931
949
|
"max_depth": "max_depth",
|
|
932
950
|
"num_splits": "min_records",
|
|
933
951
|
"operator_database": "operatordatabase",
|
|
934
|
-
"pruning": "pruning"
|
|
952
|
+
"pruning": "pruning",
|
|
953
|
+
"charset": "charset"
|
|
935
954
|
},
|
|
936
955
|
|
|
937
956
|
"DECISIONTREESCORE": {
|
|
@@ -941,7 +960,8 @@ class ValibConstants(Enum):
|
|
|
941
960
|
"profile": "profiletables",
|
|
942
961
|
"accumulate": "retain",
|
|
943
962
|
"targeted_value": "targetedvalue",
|
|
944
|
-
"gen_sql_only": "gensqlonly"
|
|
963
|
+
"gen_sql_only": "gensqlonly",
|
|
964
|
+
"charset": "charset"
|
|
945
965
|
},
|
|
946
966
|
|
|
947
967
|
"MATRIX": {
|
|
@@ -951,7 +971,8 @@ class ValibConstants(Enum):
|
|
|
951
971
|
"matrix_output": "matrixoutput",
|
|
952
972
|
"type": "matrixtype",
|
|
953
973
|
"handle_nulls": "nullhandling",
|
|
954
|
-
"filter": "where"
|
|
974
|
+
"filter": "where",
|
|
975
|
+
"charset": "charset"
|
|
955
976
|
},
|
|
956
977
|
|
|
957
978
|
"LINEAR": {
|
|
@@ -973,7 +994,8 @@ class ValibConstants(Enum):
|
|
|
973
994
|
"stepwise": "stepwise",
|
|
974
995
|
"use_fstat": "usefstat",
|
|
975
996
|
"use_pvalue": "usepvalue",
|
|
976
|
-
"variance_prop_threshold": "varianceproportionthreshold"
|
|
997
|
+
"variance_prop_threshold": "varianceproportionthreshold",
|
|
998
|
+
"charset": "charset"
|
|
977
999
|
},
|
|
978
1000
|
|
|
979
1001
|
"LINEARSCORE": {
|
|
@@ -981,7 +1003,8 @@ class ValibConstants(Enum):
|
|
|
981
1003
|
"response_column": "predicted",
|
|
982
1004
|
"residual_column": "residual",
|
|
983
1005
|
"accumulate": "retain",
|
|
984
|
-
"gen_sql_only": "gensqlonly"
|
|
1006
|
+
"gen_sql_only": "gensqlonly",
|
|
1007
|
+
"charset": "charset"
|
|
985
1008
|
},
|
|
986
1009
|
|
|
987
1010
|
"LOGISTIC": {
|
|
@@ -1011,7 +1034,8 @@ class ValibConstants(Enum):
|
|
|
1011
1034
|
"end_threshold": "thresholdend",
|
|
1012
1035
|
"increment_threshold": "thresholdincrement",
|
|
1013
1036
|
"threshold_output": "thresholdtable",
|
|
1014
|
-
"variance_prop_threshold": "varianceproportionthreshold"
|
|
1037
|
+
"variance_prop_threshold": "varianceproportionthreshold",
|
|
1038
|
+
"charset": "charset"
|
|
1015
1039
|
},
|
|
1016
1040
|
|
|
1017
1041
|
"LOGISTICSCORE": {
|
|
@@ -1023,7 +1047,8 @@ class ValibConstants(Enum):
|
|
|
1023
1047
|
"start_threshold": "thresholdbegin",
|
|
1024
1048
|
"end_threshold": "thresholdend",
|
|
1025
1049
|
"increment_threshold": "thresholdincrement",
|
|
1026
|
-
"gen_sql_only": "gensqlonly"
|
|
1050
|
+
"gen_sql_only": "gensqlonly",
|
|
1051
|
+
"charset": "charset"
|
|
1027
1052
|
|
|
1028
1053
|
# The following 3 arguments three should not be present for LogRegPredict function
|
|
1029
1054
|
# where as when the function is LogRegEvaluator, at least one of these should be
|
|
@@ -1051,13 +1076,15 @@ class ValibConstants(Enum):
|
|
|
1051
1076
|
"rotation_type": "rotationtype",
|
|
1052
1077
|
"load_threshold": "thresholdloading",
|
|
1053
1078
|
"percent_threshold": "thresholdpercent",
|
|
1054
|
-
"variance_prop_threshold": "varianceproportionthreshold"
|
|
1079
|
+
"variance_prop_threshold": "varianceproportionthreshold",
|
|
1080
|
+
"charset": "charset"
|
|
1055
1081
|
},
|
|
1056
1082
|
|
|
1057
1083
|
"FACTORSCORE": {
|
|
1058
1084
|
"index_columns": "index",
|
|
1059
1085
|
"accumulate": "retain",
|
|
1060
|
-
"gen_sql_only": "gensqlonly"
|
|
1086
|
+
"gen_sql_only": "gensqlonly",
|
|
1087
|
+
"charset": "charset"
|
|
1061
1088
|
},
|
|
1062
1089
|
|
|
1063
1090
|
"PARAMETRICTEST": {
|
|
@@ -1076,7 +1103,8 @@ class ValibConstants(Enum):
|
|
|
1076
1103
|
"style": "teststyle",
|
|
1077
1104
|
"probability_threshold": "thresholdprobability",
|
|
1078
1105
|
"with_indicator": "withindicator",
|
|
1079
|
-
"gen_sql_only": "gensqlonly"
|
|
1106
|
+
"gen_sql_only": "gensqlonly",
|
|
1107
|
+
"charset": "charset"
|
|
1080
1108
|
},
|
|
1081
1109
|
|
|
1082
1110
|
"BINOMIALTEST": {
|
|
@@ -1091,7 +1119,8 @@ class ValibConstants(Enum):
|
|
|
1091
1119
|
"stats_database": "statsdatabase",
|
|
1092
1120
|
"style": "teststyle",
|
|
1093
1121
|
"probability_threshold": "thresholdprobability",
|
|
1094
|
-
"gen_sql_only": "gensqlonly"
|
|
1122
|
+
"gen_sql_only": "gensqlonly",
|
|
1123
|
+
"charset": "charset"
|
|
1095
1124
|
},
|
|
1096
1125
|
|
|
1097
1126
|
"KSTEST": {
|
|
@@ -1103,7 +1132,8 @@ class ValibConstants(Enum):
|
|
|
1103
1132
|
"stats_database": "statsdatabase",
|
|
1104
1133
|
"style": "teststyle",
|
|
1105
1134
|
"probability_threshold": "thresholdprobability",
|
|
1106
|
-
"gen_sql_only": "gensqlonly"
|
|
1135
|
+
"gen_sql_only": "gensqlonly",
|
|
1136
|
+
"charset": "charset"
|
|
1107
1137
|
},
|
|
1108
1138
|
|
|
1109
1139
|
"CHISQUARETEST": {
|
|
@@ -1117,7 +1147,8 @@ class ValibConstants(Enum):
|
|
|
1117
1147
|
"stats_database": "statsdatabase",
|
|
1118
1148
|
"style": "teststyle",
|
|
1119
1149
|
"probability_threshold": "thresholdprobability",
|
|
1120
|
-
"gen_sql_only": "gensqlonly"
|
|
1150
|
+
"gen_sql_only": "gensqlonly",
|
|
1151
|
+
"charset": "charset"
|
|
1121
1152
|
},
|
|
1122
1153
|
|
|
1123
1154
|
"RANKTEST": {
|
|
@@ -1136,7 +1167,8 @@ class ValibConstants(Enum):
|
|
|
1136
1167
|
"style": "teststyle",
|
|
1137
1168
|
"probability_threshold": "thresholdprobability",
|
|
1138
1169
|
"treatment_column": "treatmentcolumn",
|
|
1139
|
-
"gen_sql_only": "gensqlonly"
|
|
1170
|
+
"gen_sql_only": "gensqlonly",
|
|
1171
|
+
"charset": "charset"
|
|
1140
1172
|
},
|
|
1141
1173
|
|
|
1142
1174
|
"VARTRAN": {
|
|
@@ -1147,13 +1179,15 @@ class ValibConstants(Enum):
|
|
|
1147
1179
|
"allow_duplicates": "multiset",
|
|
1148
1180
|
"nopi": "noindex",
|
|
1149
1181
|
"filter": "whereclause",
|
|
1150
|
-
"gen_sql_only": "gensqlonly"
|
|
1182
|
+
"gen_sql_only": "gensqlonly",
|
|
1183
|
+
"charset": "charset"
|
|
1151
1184
|
},
|
|
1152
1185
|
|
|
1153
1186
|
"REPORT": {
|
|
1154
1187
|
"analysis_type": "analysistype",
|
|
1155
1188
|
"filter": "where",
|
|
1156
|
-
"gen_sql_only": "gensqlonly"
|
|
1189
|
+
"gen_sql_only": "gensqlonly",
|
|
1190
|
+
"charset": "charset"
|
|
1157
1191
|
}
|
|
1158
1192
|
}
|
|
1159
1193
|
|
|
@@ -1448,6 +1482,7 @@ class HTTPRequest(Enum):
|
|
|
1448
1482
|
POST = "post"
|
|
1449
1483
|
PUT = "put"
|
|
1450
1484
|
DELETE = "delete"
|
|
1485
|
+
PATCH = "patch"
|
|
1451
1486
|
|
|
1452
1487
|
|
|
1453
1488
|
class AsyncStatusColumns(Enum):
|
|
@@ -1497,14 +1532,3 @@ class SessionParamsPythonNames:
|
|
|
1497
1532
|
DATABASE = "Current DataBase"
|
|
1498
1533
|
DATEFORM = 'Current DateForm'
|
|
1499
1534
|
|
|
1500
|
-
|
|
1501
|
-
class Action(Enum):
|
|
1502
|
-
# Holds variable names for the type of grant to be provided.
|
|
1503
|
-
GRANT = "GRANT"
|
|
1504
|
-
REVOKE = "REVOKE"
|
|
1505
|
-
|
|
1506
|
-
class Permission(Enum):
|
|
1507
|
-
# Holds variable names for the type of permission to be provided.
|
|
1508
|
-
READ = "READ"
|
|
1509
|
-
WRITE = "WRITE"
|
|
1510
|
-
|
|
@@ -164,7 +164,7 @@ class ErrorInfoCodes(Enum):
|
|
|
164
164
|
AED_SETOP_INPUT_TABLE_COLUMNS_COUNT_MISMATCH = 'TDML_2111'
|
|
165
165
|
AED_SHOW_QUERY_MULTIPLE_OPTIONS = 'TDML_2112'
|
|
166
166
|
|
|
167
|
-
# Table Operator Error Codes starting from 2300 - Reserved till
|
|
167
|
+
# Table Operator Error Codes starting from 2300 - Reserved till 2314
|
|
168
168
|
INPUT_FILE_NOT_FOUND = 'TDML_2300'
|
|
169
169
|
REMOVE_FILE_FAILED = 'TDML_2301'
|
|
170
170
|
INSTALL_FILE_FAILED = 'TDML_2302'
|
|
@@ -175,6 +175,8 @@ class ErrorInfoCodes(Enum):
|
|
|
175
175
|
NOT_ALLOWED_VALUES = 'TDML_2307'
|
|
176
176
|
ARGUMENT_VALUE_SAME = 'TDML_2308'
|
|
177
177
|
PYTHON_NOT_INSTALLED = 'TDML_2309'
|
|
178
|
+
PYTHON_VERSION_MISMATCH = 'TDML_2310'
|
|
179
|
+
PYTHON_VERSION_MISMATCH_OAF = 'TDML_2416'
|
|
178
180
|
EMPTY_FILE = 'TDML_2311'
|
|
179
181
|
ARG_NONE = 'TDML_2312'
|
|
180
182
|
EITHER_FUNCTION_OR_ARGS = 'TDML_2313'
|
|
@@ -223,7 +225,7 @@ class ErrorInfoCodes(Enum):
|
|
|
223
225
|
TARGET_COL_NOT_FOUND_FOR_EVALUATE = 'TDML_2541'
|
|
224
226
|
|
|
225
227
|
# OpenAF Error codes starting from 2551 - Reserved till 2560.
|
|
226
|
-
|
|
228
|
+
SET_REQUIRED_PARAMS = 'TDML_2551'
|
|
227
229
|
|
|
228
230
|
class MessageCodes(Enum):
|
|
229
231
|
"""
|
|
@@ -396,6 +398,10 @@ class MessageCodes(Enum):
|
|
|
396
398
|
|
|
397
399
|
PYTHON_NOT_INSTALLED = "Python is not installed on Vantage. " \
|
|
398
400
|
"Please install Python interpreter and add-on packages on Vantage."
|
|
401
|
+
PYTHON_VERSION_MISMATCH = "Python version on Vantage is not same as that of local environment. " \
|
|
402
|
+
"Use the same Python version '{}' or '{}.x' in local environment."
|
|
403
|
+
PYTHON_VERSION_MISMATCH_OAF = "Python version of Lake user environment '{}' is not same as that of local environment '{}'. " \
|
|
404
|
+
"Maintain similar version of Python between Lake user environment and local environment."
|
|
399
405
|
IMPORT_PYTHON_PACKAGE = "Module '{}' not found. Install '{}' before running {}()."
|
|
400
406
|
INT_ARGUMENT_COMPARISON = "Argument '{}' must be {} to argument '{}'"
|
|
401
407
|
EXECUTION_FAILED = "Failed to {}. {}"
|
|
@@ -430,4 +436,8 @@ class MessageCodes(Enum):
|
|
|
430
436
|
PATH_NOT_FOUND = "Specified local path '{}' not found. Please check the path."
|
|
431
437
|
TARGET_COL_NOT_FOUND_FOR_EVALUATE = "Target column '{}' not found in the passed dataFrame. "\
|
|
432
438
|
"evaluate() requires target column to be present in the dataFrame."
|
|
433
|
-
|
|
439
|
+
SET_REQUIRED_PARAMS = "{} is required to run '{}'. Set it using {}()."
|
|
440
|
+
CONNECTION_PARAMS = "Required connection parameters are missing. Connection parameters should either be " \
|
|
441
|
+
"explicitly passed to function or specified using a configuration file, or setting up " \
|
|
442
|
+
"the environment variables."
|
|
443
|
+
|