teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +196 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +79 -4
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +1 -0
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/automl/data_preparation.py +3 -2
- teradataml/automl/feature_engineering.py +15 -7
- teradataml/automl/model_training.py +39 -33
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +35 -0
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +8 -2
- teradataml/common/messages.py +3 -1
- teradataml/common/sqlbundle.py +25 -3
- teradataml/common/utils.py +134 -9
- teradataml/context/context.py +20 -10
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/dataframe.py +543 -175
- teradataml/dataframe/functions.py +553 -25
- teradataml/dataframe/sql.py +184 -15
- teradataml/dbutils/dbutils.py +556 -18
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
- teradataml/options/__init__.py +7 -23
- teradataml/options/configure.py +29 -3
- teradataml/scriptmgmt/UserEnv.py +3 -3
- teradataml/scriptmgmt/lls_utils.py +74 -21
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +33 -1
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
|
@@ -28,6 +28,7 @@ from teradataml.dataframe.dataframe import DataFrame
|
|
|
28
28
|
from teradataml import execute_sql, get_connection
|
|
29
29
|
from teradataml import SVM, GLM, DecisionForest, XGBoost, GridSearch, KNN, RandomSearch
|
|
30
30
|
from teradataml.utils.validators import _Validators
|
|
31
|
+
from teradataml.common.utils import UtilFuncs
|
|
31
32
|
|
|
32
33
|
|
|
33
34
|
class _ModelTraining:
|
|
@@ -796,7 +797,8 @@ class _ModelTraining:
|
|
|
796
797
|
trained_models = []
|
|
797
798
|
for param in model_params:
|
|
798
799
|
result = self._hyperparameter_tunning(param, trainng_datas)
|
|
799
|
-
|
|
800
|
+
if result is not None:
|
|
801
|
+
trained_models.append(result)
|
|
800
802
|
|
|
801
803
|
models_df = pd.concat(trained_models, ignore_index=True)
|
|
802
804
|
return models_df
|
|
@@ -886,39 +888,43 @@ class _ModelTraining:
|
|
|
886
888
|
# Getting all passed models
|
|
887
889
|
model_info = _obj.model_stats.merge(_obj.models[_obj.models['STATUS']=='PASS'][['MODEL_ID', 'DATA_ID', 'PARAMETERS']],
|
|
888
890
|
on='MODEL_ID', how='inner')
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
|
|
895
|
-
|
|
896
|
-
|
|
897
|
-
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
|
|
905
|
-
|
|
906
|
-
if not self.is_classification_type():
|
|
907
|
-
# Calculating Adjusted-R2 for regression
|
|
908
|
-
# Getting size and feature count for each feature selection method
|
|
909
|
-
methods = ["lasso", "rfe", "pca"]
|
|
910
|
-
size_map = {method : df.select('id').size for method, df in zip(methods, train_data)}
|
|
911
|
-
feature_count_map = {method : len(df.columns) - 2 for method, df in zip(methods, train_data)}
|
|
912
|
-
model_info['ADJUSTED_R2'] = model_info.apply(lambda row:
|
|
913
|
-
1 - ((1 - row['R2']) * (size_map[row['FEATURE_SELECTION']] - 1) /
|
|
914
|
-
(size_map[row['FEATURE_SELECTION']] - feature_count_map[row['FEATURE_SELECTION']] - 1)), axis=1)
|
|
915
|
-
|
|
916
|
-
self._display_msg(msg="-"*100,
|
|
917
|
-
progress_bar=self.progress_bar,
|
|
918
|
-
show_data=True)
|
|
919
|
-
self.progress_bar.update()
|
|
891
|
+
if not model_info.empty:
|
|
892
|
+
# Creating mapping data ID to feature selection method
|
|
893
|
+
data_id_to_table_map = {"DF_0": ('lasso', train_data[0]._table_name),
|
|
894
|
+
"DF_1": ('rfe', train_data[1]._table_name),
|
|
895
|
+
"DF_2": ('pca', train_data[2]._table_name)}
|
|
896
|
+
|
|
897
|
+
# Updating model stats with feature selection method and result table
|
|
898
|
+
for index, row in model_info.iterrows():
|
|
899
|
+
model_info.loc[index, 'FEATURE_SELECTION'] = data_id_to_table_map[row['DATA_ID']][0]
|
|
900
|
+
model_info.loc[index, 'DATA_TABLE'] = data_id_to_table_map[row['DATA_ID']][1]
|
|
901
|
+
model_info.loc[index, 'RESULT_TABLE'] = _obj.get_model(row['MODEL_ID']).result._table_name
|
|
902
|
+
model_info.loc[index, 'model-obj'] = _obj.get_model(row['MODEL_ID'])
|
|
903
|
+
|
|
904
|
+
# Dropping column 'DATA_ID'
|
|
905
|
+
model_info.drop(['DATA_ID'], axis=1, inplace=True)
|
|
920
906
|
|
|
921
|
-
|
|
907
|
+
model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
|
|
908
|
+
|
|
909
|
+
if not self.is_classification_type():
|
|
910
|
+
# Calculating Adjusted-R2 for regression
|
|
911
|
+
# Getting size and feature count for each feature selection method
|
|
912
|
+
methods = ["lasso", "rfe", "pca"]
|
|
913
|
+
size_map = {method : df.select('id').size for method, df in zip(methods, train_data)}
|
|
914
|
+
feature_count_map = {method : len(df.columns) - 2 for method, df in zip(methods, train_data)}
|
|
915
|
+
model_info['ADJUSTED_R2'] = model_info.apply(lambda row:
|
|
916
|
+
1 - ((1 - row['R2']) * (size_map[row['FEATURE_SELECTION']] - 1) /
|
|
917
|
+
(size_map[row['FEATURE_SELECTION']] - feature_count_map[row['FEATURE_SELECTION']] - 1)), axis=1)
|
|
918
|
+
|
|
919
|
+
self._display_msg(msg="-"*100,
|
|
920
|
+
progress_bar=self.progress_bar,
|
|
921
|
+
show_data=True)
|
|
922
|
+
self.progress_bar.update()
|
|
923
|
+
|
|
924
|
+
return model_info
|
|
925
|
+
|
|
926
|
+
# Returning None, if no model is passed
|
|
927
|
+
return None
|
|
922
928
|
|
|
923
929
|
@staticmethod
|
|
924
930
|
def _eval_params_generation(ml_name,
|
teradataml/common/__init__.py
CHANGED
|
@@ -1 +1,2 @@
|
|
|
1
|
-
from teradataml.common.formula import as_categorical
|
|
1
|
+
from teradataml.common.formula import as_categorical
|
|
2
|
+
from teradataml.common.constants import Action, Permission
|
teradataml/common/constants.py
CHANGED
|
@@ -18,6 +18,14 @@ from teradataml.options.configure import configure
|
|
|
18
18
|
from teradatasqlalchemy.types import (INTEGER, SMALLINT, BIGINT, BYTEINT, DECIMAL, FLOAT, NUMBER, VARCHAR)
|
|
19
19
|
from teradatasqlalchemy.types import (DATE, TIME, TIMESTAMP)
|
|
20
20
|
from teradatasqlalchemy.types import (BYTE, VARBYTE, BLOB)
|
|
21
|
+
from teradatasqlalchemy import (CHAR, CLOB)
|
|
22
|
+
from teradatasqlalchemy import (PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP)
|
|
23
|
+
from teradatasqlalchemy import (INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
|
|
24
|
+
INTERVAL_DAY,INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
|
|
25
|
+
INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
|
|
26
|
+
INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
|
|
27
|
+
INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND,
|
|
28
|
+
INTERVAL_SECOND)
|
|
21
29
|
from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
|
|
22
30
|
|
|
23
31
|
|
|
@@ -53,6 +61,7 @@ class SQLConstants(Enum):
|
|
|
53
61
|
SQL_DELETE_ALL_ROWS = 29
|
|
54
62
|
SQL_DELETE_SPECIFIC_ROW = 30
|
|
55
63
|
SQL_EXEC_STORED_PROCEDURE = 31
|
|
64
|
+
SQL_SELECT_COLUMNNAMES_WITH_WHERE = 32
|
|
56
65
|
CONSTRAINT = ["check_constraint", "primary_key_constraint",
|
|
57
66
|
"foreign_key_constraint", "unique_key_constraint"]
|
|
58
67
|
|
|
@@ -123,6 +132,14 @@ class TeradataTypes(Enum):
|
|
|
123
132
|
TD_DATE_TYPES = [DATE, sqlalchemy.sql.sqltypes.Date]
|
|
124
133
|
TD_DATE_CODES = ["DA"]
|
|
125
134
|
TD_NULL_TYPE = "NULLTYPE"
|
|
135
|
+
TD_ALL_TYPES = (BYTEINT, SMALLINT, INTEGER, BIGINT, DECIMAL, FLOAT, NUMBER,
|
|
136
|
+
TIMESTAMP, DATE, TIME, CHAR, VARCHAR, CLOB, BYTE, VARBYTE,
|
|
137
|
+
BLOB, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP,
|
|
138
|
+
INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
|
|
139
|
+
INTERVAL_DAY, INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
|
|
140
|
+
INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
|
|
141
|
+
INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
|
|
142
|
+
INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND, INTERVAL_SECOND)
|
|
126
143
|
|
|
127
144
|
|
|
128
145
|
class TeradataTableKindConstants(Enum):
|
|
@@ -427,6 +444,8 @@ class TableOperatorConstants(Enum):
|
|
|
427
444
|
APPLY_TEMPLATE = "dataframe_apply.template"
|
|
428
445
|
# Template of the intermediate script that will be generated for UDF.
|
|
429
446
|
UDF_TEMPLATE = "dataframe_udf.template"
|
|
447
|
+
# Template of the intermediate script that will be generated for register.
|
|
448
|
+
REGISTER_TEMPLATE = "dataframe_register.template"
|
|
430
449
|
# In-DB execution mode.
|
|
431
450
|
INDB_EXEC = "IN-DB"
|
|
432
451
|
# Local execution mode.
|
|
@@ -443,6 +462,8 @@ class TableOperatorConstants(Enum):
|
|
|
443
462
|
APPLY_OP = "apply"
|
|
444
463
|
# udf operation.
|
|
445
464
|
UDF_OP = "udf"
|
|
465
|
+
# register operation.
|
|
466
|
+
REGISTER_OP = "register"
|
|
446
467
|
# Template of the script_executor that will be used to generate the temporary script_executor file.
|
|
447
468
|
SCRIPT_TEMPLATE = "script_executor.template"
|
|
448
469
|
# Log Type.
|
|
@@ -480,6 +501,9 @@ class TableOperatorConstants(Enum):
|
|
|
480
501
|
"delimiter(' ') " \
|
|
481
502
|
"returns('package VARCHAR({2}), " \
|
|
482
503
|
"version VARCHAR({2})'))"
|
|
504
|
+
|
|
505
|
+
SCRIPT_LIST_FILES_QUERY = "SELECT DISTINCT * FROM SCRIPT (SCRIPT_COMMAND " \
|
|
506
|
+
"('ls ./{}') RETURNS ('Files VARCHAR({})'))"
|
|
483
507
|
|
|
484
508
|
class ValibConstants(Enum):
|
|
485
509
|
# A dictionary that maps teradataml name of the exposed VALIB function name
|
|
@@ -1473,3 +1497,14 @@ class SessionParamsPythonNames:
|
|
|
1473
1497
|
DATABASE = "Current DataBase"
|
|
1474
1498
|
DATEFORM = 'Current DateForm'
|
|
1475
1499
|
|
|
1500
|
+
|
|
1501
|
+
class Action(Enum):
|
|
1502
|
+
# Holds variable names for the type of grant to be provided.
|
|
1503
|
+
GRANT = "GRANT"
|
|
1504
|
+
REVOKE = "REVOKE"
|
|
1505
|
+
|
|
1506
|
+
class Permission(Enum):
|
|
1507
|
+
# Holds variable names for the type of permission to be provided.
|
|
1508
|
+
READ = "READ"
|
|
1509
|
+
WRITE = "WRITE"
|
|
1510
|
+
|
|
@@ -520,7 +520,8 @@ class GarbageCollector():
|
|
|
520
520
|
fileparts = file.split(GarbageCollector.__filenameseperator)
|
|
521
521
|
hostname = fileparts[1]
|
|
522
522
|
filepid = int(fileparts[2])
|
|
523
|
-
|
|
523
|
+
# Check for both host ip and hostname in case user passed hostname for creating connection.
|
|
524
|
+
if hostname == tdmlctx.context._get_host_ip() or hostname == tdmlctx.context._get_host():
|
|
524
525
|
if filepid == os.getpid() or not psutil.pid_exists(filepid):
|
|
525
526
|
tempfiles.append(filepath)
|
|
526
527
|
except (IndexError, ValueError):
|
|
@@ -86,6 +86,7 @@ class ErrorInfoCodes(Enum):
|
|
|
86
86
|
LIST_DB_TABLES_FAILED = 'TDML_2053'
|
|
87
87
|
INVALID_CONTEXT_CONNECTION = 'TDML_2054'
|
|
88
88
|
TDMLDF_REQUIRED_TABLE_ALIAS = "TDML_2055"
|
|
89
|
+
TDMLDF_ALIAS_REQUIRED = TDMLDF_REQUIRED_TABLE_ALIAS
|
|
89
90
|
TDMLDF_COLUMN_ALREADY_EXISTS = "TDML_2056"
|
|
90
91
|
TDMLDF_AGGREGATE_INVALID_COLUMN = 'TDML_2057'
|
|
91
92
|
TDMLDF_AGGREGATE_COMBINED_ERR = 'TDML_2058'
|
|
@@ -221,6 +222,9 @@ class ErrorInfoCodes(Enum):
|
|
|
221
222
|
INVALID_PARTITIONING_COLS = 'TDML_2540'
|
|
222
223
|
TARGET_COL_NOT_FOUND_FOR_EVALUATE = 'TDML_2541'
|
|
223
224
|
|
|
225
|
+
# OpenAF Error codes starting from 2551 - Reserved till 2560.
|
|
226
|
+
AUTH_TOKEN_REQUIRED = 'TDML_2551'
|
|
227
|
+
|
|
224
228
|
class MessageCodes(Enum):
|
|
225
229
|
"""
|
|
226
230
|
MessageCodes contains all the messages that are displayed to the user which are informational
|
|
@@ -274,6 +278,7 @@ class MessageCodes(Enum):
|
|
|
274
278
|
"and df.select([['col1', 'col2', 'col3']])."
|
|
275
279
|
TDMLDF_INVALID_TABLE_ALIAS = "{} should not be equal."
|
|
276
280
|
TDMLDF_REQUIRED_TABLE_ALIAS = "All arguments lsuffix, rsuffix, lprefix and rprefix should not be None as TeradataML DataFrames contains common column(s)."
|
|
281
|
+
TDMLDF_ALIAS_REQUIRED = "Use aliased DataFrames for self {}."
|
|
277
282
|
TDMLDF_COLUMN_ALREADY_EXISTS = "Column name with alias '{}' already exists in {} TeradataML DataFrame, change '{}'"
|
|
278
283
|
TDMLDF_INVALID_JOIN_CONDITION = "Invalid 'on' condition(s): '{}', check documentation for valid conditions."
|
|
279
284
|
TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS = "Number of columns in '{}' and '{}' should be equal."
|
|
@@ -419,9 +424,10 @@ class MessageCodes(Enum):
|
|
|
419
424
|
"they should be present in '{}' DataFrame."
|
|
420
425
|
PARTITIONING_COLS_IN_FEATURE_COLS = "Columns in '{}' argument should not be part of"\
|
|
421
426
|
" feature columns."
|
|
422
|
-
PARTITION_VALUES_NOT_MATCHING = "Values in
|
|
427
|
+
PARTITION_VALUES_NOT_MATCHING = "Values in {} and {} data partition columns should be same."
|
|
423
428
|
PARTITION_IN_BOTH_FIT_AND_PREDICT = "Use \"partition_columns\" only if model is fitted with partition_column(s)."
|
|
424
429
|
INVALID_PARTITIONING_COLS = "Provided partition_column(s) '{}' is/are not present in parent of '{}' DataFrame(s)."
|
|
425
430
|
PATH_NOT_FOUND = "Specified local path '{}' not found. Please check the path."
|
|
426
431
|
TARGET_COL_NOT_FOUND_FOR_EVALUATE = "Target column '{}' not found in the passed dataFrame. "\
|
|
427
|
-
"evaluate() requires target column to be present in the dataFrame."
|
|
432
|
+
"evaluate() requires target column to be present in the dataFrame."
|
|
433
|
+
AUTH_TOKEN_REQUIRED = "Authentication token is required to run '{}'. Set the token using set_auth_token()."
|
teradataml/common/messages.py
CHANGED
|
@@ -88,6 +88,7 @@ class Messages():
|
|
|
88
88
|
[ErrorInfoCodes.TDMLDF_INVALID_JOIN_CONDITION, MessageCodes.TDMLDF_INVALID_JOIN_CONDITION],
|
|
89
89
|
[ErrorInfoCodes.TDMLDF_INVALID_TABLE_ALIAS, MessageCodes.TDMLDF_INVALID_TABLE_ALIAS],
|
|
90
90
|
[ErrorInfoCodes.TDMLDF_REQUIRED_TABLE_ALIAS, MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS],
|
|
91
|
+
[ErrorInfoCodes.TDMLDF_ALIAS_REQUIRED, MessageCodes.TDMLDF_ALIAS_REQUIRED],
|
|
91
92
|
[ErrorInfoCodes.TDMLDF_COLUMN_ALREADY_EXISTS, MessageCodes.TDMLDF_COLUMN_ALREADY_EXISTS],
|
|
92
93
|
[ErrorInfoCodes.INVALID_LENGTH_ARGS, MessageCodes.INVALID_LENGTH_ARGS],
|
|
93
94
|
[ErrorInfoCodes.TDMLDF_AGGREGATE_UNSUPPORTED, MessageCodes.TDMLDF_AGGREGATE_UNSUPPORTED],
|
|
@@ -190,7 +191,8 @@ class Messages():
|
|
|
190
191
|
[ErrorInfoCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT, MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT],
|
|
191
192
|
[ErrorInfoCodes.INVALID_PARTITIONING_COLS, MessageCodes.INVALID_PARTITIONING_COLS],
|
|
192
193
|
[ErrorInfoCodes.PATH_NOT_FOUND, MessageCodes.PATH_NOT_FOUND],
|
|
193
|
-
[ErrorInfoCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE, MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE]
|
|
194
|
+
[ErrorInfoCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE, MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE],
|
|
195
|
+
[ErrorInfoCodes.AUTH_TOKEN_REQUIRED, MessageCodes.AUTH_TOKEN_REQUIRED],
|
|
194
196
|
]
|
|
195
197
|
|
|
196
198
|
@staticmethod
|
teradataml/common/sqlbundle.py
CHANGED
|
@@ -67,7 +67,8 @@ class SQLBundle:
|
|
|
67
67
|
[SQLConstants.SQL_DELETE_ALL_ROWS, "DELETE FROM {0}"],
|
|
68
68
|
[SQLConstants.SQL_DELETE_SPECIFIC_ROW, "DELETE FROM {0} WHERE {1}"],
|
|
69
69
|
[SQLConstants.SQL_CREATE_TABLE_USING_COLUMNS, "CREATE MULTISET TABLE {0}( {1} )"],
|
|
70
|
-
[SQLConstants.SQL_EXEC_STORED_PROCEDURE, "call {0}"]
|
|
70
|
+
[SQLConstants.SQL_EXEC_STORED_PROCEDURE, "call {0}"],
|
|
71
|
+
[SQLConstants.SQL_SELECT_COLUMNNAMES_WITH_WHERE, "sel {0} from {1} where {2}"],
|
|
71
72
|
|
|
72
73
|
]
|
|
73
74
|
self._add_sql_version()
|
|
@@ -446,11 +447,32 @@ class SQLBundle:
|
|
|
446
447
|
query = sqlbundle._get_sql_query(SQLConstants.SQL_SELECT_DATABASENAME).format(schema_name)
|
|
447
448
|
if table_name:
|
|
448
449
|
if '%' in table_name:
|
|
449
|
-
|
|
450
|
+
# Check if '%' is present in the between of table name excluding first and last character
|
|
451
|
+
# as '%' can be present at the start or end of table name
|
|
452
|
+
# and replace single quotes with empty string
|
|
453
|
+
# Checking for following cases with % in between table name:
|
|
454
|
+
# eg: table_name = 'ab%c', '%a%bc', '%ab%c%' or 'ab%c%'
|
|
455
|
+
|
|
456
|
+
# table_name[1:-1] - Removing single quotes from table name
|
|
457
|
+
# table_name[1:-1][1:-1] - Removing first and last character from table_name[1:-1]
|
|
458
|
+
if '%' in table_name[1:-1][1:-1]:
|
|
459
|
+
query = "{0}{1}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME_LIKE).format(table_name))
|
|
460
|
+
else:
|
|
461
|
+
# Checking for following cases with % at the start or end of table name:
|
|
462
|
+
# eg: table_name = '%abc', 'abc%', '%abc%'
|
|
463
|
+
# Extracting table name without '%' character
|
|
464
|
+
table_name_str = table_name.replace('%', '')
|
|
465
|
+
|
|
466
|
+
# Adding condition to check if table name contains the string using POSITION function
|
|
467
|
+
# POSITION function returns the position index of the substring in the string if found,
|
|
468
|
+
# else returns 0
|
|
469
|
+
query = "{0}{1}{2}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME_LIKE).format(table_name), \
|
|
470
|
+
" AND POSITION({0} IN TABLENAME) > 0".format(table_name_str))
|
|
450
471
|
else:
|
|
451
|
-
query = "{0}{1}".format(query, sqlbundle._get_sql_query(SQLConstants.
|
|
472
|
+
query = "{0}{1}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME_LIKE).format(table_name))
|
|
452
473
|
if table_kind:
|
|
453
474
|
query = '{0}{1}'.format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_KIND).format(table_kind))
|
|
475
|
+
|
|
454
476
|
return query
|
|
455
477
|
|
|
456
478
|
# TODO :: Following SQLConstants needs to be implemented as and when needed.
|
teradataml/common/utils.py
CHANGED
|
@@ -13,6 +13,7 @@ by other classes which can be reused according to the need.
|
|
|
13
13
|
Add all the common functions in this class like creating temporary table names, getting
|
|
14
14
|
the datatypes etc.
|
|
15
15
|
"""
|
|
16
|
+
from inspect import getsource
|
|
16
17
|
import json
|
|
17
18
|
import uuid
|
|
18
19
|
from math import floor
|
|
@@ -43,6 +44,7 @@ from teradataml.options.configure import configure
|
|
|
43
44
|
from teradataml.options.display import display
|
|
44
45
|
from teradataml.common.constants import TeradataReservedKeywords, TeradataConstants
|
|
45
46
|
|
|
47
|
+
from teradataml.utils.internal_buffer import _InternalBuffer
|
|
46
48
|
from teradatasqlalchemy.types import _TDType
|
|
47
49
|
from teradatasqlalchemy.types import (INTEGER, SMALLINT, BIGINT, BYTEINT,
|
|
48
50
|
DECIMAL, FLOAT, NUMBER)
|
|
@@ -1249,6 +1251,41 @@ class UtilFuncs():
|
|
|
1249
1251
|
return UtilFuncs._teradata_quote_arg(keyword, "\"", False)
|
|
1250
1252
|
|
|
1251
1253
|
return keyword
|
|
1254
|
+
|
|
1255
|
+
def _contains_space(item):
|
|
1256
|
+
"""
|
|
1257
|
+
Check if the specified string in item has spaces or tabs in it.
|
|
1258
|
+
|
|
1259
|
+
PARAMETERS:
|
|
1260
|
+
item:
|
|
1261
|
+
Required Argument.
|
|
1262
|
+
Specifies a string to check for spaces or tabs.
|
|
1263
|
+
Types: str
|
|
1264
|
+
|
|
1265
|
+
RETURNS:
|
|
1266
|
+
True, if the specified string has spaces or tabs in it, else False.
|
|
1267
|
+
|
|
1268
|
+
RAISES:
|
|
1269
|
+
None.
|
|
1270
|
+
|
|
1271
|
+
EXAMPLES:
|
|
1272
|
+
# Passing column name with spaces returns True.
|
|
1273
|
+
is_space = UtilFuncs._contains_space("col name")
|
|
1274
|
+
print(is_space)
|
|
1275
|
+
# Passing column name without spaces returns False.
|
|
1276
|
+
is_space = UtilFuncs._contains_space("colname")
|
|
1277
|
+
print(is_space)
|
|
1278
|
+
"""
|
|
1279
|
+
# Check if the input is a string and look for spaces or tabs
|
|
1280
|
+
if isinstance(item, str):
|
|
1281
|
+
return any(char in {' ', '\t'} for char in item)
|
|
1282
|
+
|
|
1283
|
+
# If the input is a list, check each element
|
|
1284
|
+
if isinstance(item, list):
|
|
1285
|
+
# Check each item in the list
|
|
1286
|
+
return any(UtilFuncs._contains_space(col) for col in item)
|
|
1287
|
+
|
|
1288
|
+
return False
|
|
1252
1289
|
|
|
1253
1290
|
@staticmethod
|
|
1254
1291
|
def _in_schema(schema_name, table_name):
|
|
@@ -2358,8 +2395,16 @@ class UtilFuncs():
|
|
|
2358
2395
|
>>> self._is_lake()
|
|
2359
2396
|
"""
|
|
2360
2397
|
|
|
2361
|
-
|
|
2362
|
-
|
|
2398
|
+
tbl_operator = configure.table_operator.lower() \
|
|
2399
|
+
if configure.table_operator else None
|
|
2400
|
+
|
|
2401
|
+
# If the user does not provide a table_operator, check the database version
|
|
2402
|
+
# and determine the system type accordingly.
|
|
2403
|
+
if tbl_operator is None:
|
|
2404
|
+
from teradataml.context.context import _get_database_version
|
|
2405
|
+
return int(_get_database_version().split(".")[0]) >= 20
|
|
2406
|
+
|
|
2407
|
+
return tbl_operator == "apply"
|
|
2363
2408
|
|
|
2364
2409
|
@staticmethod
|
|
2365
2410
|
def _get_python_execution_path():
|
|
@@ -2379,8 +2424,8 @@ class UtilFuncs():
|
|
|
2379
2424
|
EXAMPLES:
|
|
2380
2425
|
>>> self._get_python_execution_path()
|
|
2381
2426
|
"""
|
|
2382
|
-
# 'indb_install_location' expects python installation directory path.
|
|
2383
|
-
# Hence, postfixing python binary path.
|
|
2427
|
+
# 'indb_install_location' expects python installation directory path.
|
|
2428
|
+
# Hence, postfixing python binary path.
|
|
2384
2429
|
return "python" if UtilFuncs._is_lake() else \
|
|
2385
2430
|
'{}/bin/python3'.format(configure.indb_install_location)
|
|
2386
2431
|
|
|
@@ -2403,7 +2448,7 @@ class UtilFuncs():
|
|
|
2403
2448
|
"""
|
|
2404
2449
|
db_name = UtilFuncs._teradata_unquote_arg(UtilFuncs._extract_db_name(tablename), "\"")
|
|
2405
2450
|
table_view_name = UtilFuncs._teradata_unquote_arg(UtilFuncs._extract_table_name(tablename), "\"")
|
|
2406
|
-
query = SQLBundle._build_select_table_kind(db_name, "{0}".format(table_view_name), "'V'")
|
|
2451
|
+
query = SQLBundle._build_select_table_kind(db_name, "'{0}'".format(table_view_name), "'V'")
|
|
2407
2452
|
|
|
2408
2453
|
df = UtilFuncs._execute_query(query)
|
|
2409
2454
|
if len(df) > 0:
|
|
@@ -2471,7 +2516,7 @@ class UtilFuncs():
|
|
|
2471
2516
|
except Exception as exc:
|
|
2472
2517
|
raise exc
|
|
2473
2518
|
|
|
2474
|
-
def _get_env_name(col):
|
|
2519
|
+
def _get_env_name(col=None):
|
|
2475
2520
|
"""
|
|
2476
2521
|
DESCRIPTION:
|
|
2477
2522
|
Internal function to get the env name if passed with ColumnExpression
|
|
@@ -2479,9 +2524,10 @@ class UtilFuncs():
|
|
|
2479
2524
|
|
|
2480
2525
|
PARAMETERS:
|
|
2481
2526
|
col:
|
|
2482
|
-
|
|
2527
|
+
Optional Argument.
|
|
2483
2528
|
Specifies teradataml DataFrame ColumnExpression.
|
|
2484
2529
|
Types: teradataml DataFrame ColumnExpression
|
|
2530
|
+
Default Value: None
|
|
2485
2531
|
|
|
2486
2532
|
RAISES:
|
|
2487
2533
|
None.
|
|
@@ -2493,10 +2539,10 @@ class UtilFuncs():
|
|
|
2493
2539
|
>>> self._get_env_name(col)
|
|
2494
2540
|
"""
|
|
2495
2541
|
|
|
2496
|
-
# If env_name is passed with
|
|
2542
|
+
# If ColumnExpression is passed and env_name is passed with it fetch the env name,
|
|
2497
2543
|
# else check if default "openml_user_env" env is configured or not,
|
|
2498
2544
|
# else get the default "openml_env" env if exists or create new deafult env.
|
|
2499
|
-
if col._env_name is not None:
|
|
2545
|
+
if col and col._env_name is not None:
|
|
2500
2546
|
from teradataml.scriptmgmt.UserEnv import UserEnv
|
|
2501
2547
|
env = col._env_name
|
|
2502
2548
|
env_name = env.env_name if isinstance(col._env_name, UserEnv) else env
|
|
@@ -2506,5 +2552,84 @@ class UtilFuncs():
|
|
|
2506
2552
|
env_name = UtilFuncs._create_or_get_env("open_source_ml.json").env_name
|
|
2507
2553
|
return env_name
|
|
2508
2554
|
|
|
2555
|
+
def _func_to_string(user_functions):
|
|
2556
|
+
"""
|
|
2557
|
+
DESCRIPTION:
|
|
2558
|
+
Internal function to get the user functions in a single string format.
|
|
2559
|
+
|
|
2560
|
+
PARAMETERS:
|
|
2561
|
+
user_functions:
|
|
2562
|
+
Required Argument.
|
|
2563
|
+
List of user functions.
|
|
2564
|
+
Types: list
|
|
2565
|
+
|
|
2566
|
+
RAISES:
|
|
2567
|
+
None.
|
|
2568
|
+
|
|
2569
|
+
RETURNS:
|
|
2570
|
+
string
|
|
2571
|
+
|
|
2572
|
+
EXAMPLES:
|
|
2573
|
+
>>> from teradataml.dataframe.functions import udf
|
|
2574
|
+
>>> @udf(returns=VARCHAR())
|
|
2575
|
+
... def sum(x, y):
|
|
2576
|
+
... return x+y
|
|
2577
|
+
>>>
|
|
2578
|
+
>>> def to_upper(s):
|
|
2579
|
+
... return s.upper()
|
|
2580
|
+
>>> user_functions = [sum(1,2)._udf, to_upper]
|
|
2581
|
+
>>> res = self._func_to_string(user_functions)
|
|
2582
|
+
>>> print(res)
|
|
2583
|
+
def sum(x, y):
|
|
2584
|
+
return x+y
|
|
2585
|
+
|
|
2586
|
+
def to_upper(s):
|
|
2587
|
+
return s.upper()
|
|
2588
|
+
|
|
2589
|
+
>>>
|
|
2590
|
+
"""
|
|
2591
|
+
user_function_code = ""
|
|
2592
|
+
for func in user_functions:
|
|
2593
|
+
# Get the source code of the user function.
|
|
2594
|
+
func = getsource(func)
|
|
2595
|
+
# If the function have any extra space in the beginning remove it.
|
|
2596
|
+
func = func.lstrip()
|
|
2597
|
+
# Function can have decorator,e.g. udf as decorator, remove it.
|
|
2598
|
+
if func.startswith("@"):
|
|
2599
|
+
func = func[func.find("\n")+1: ].lstrip()
|
|
2600
|
+
# If multiple functions are passed, separate them with new line.
|
|
2601
|
+
user_function_code += func + '\n'
|
|
2602
|
+
return user_function_code
|
|
2603
|
+
|
|
2604
|
+
@staticmethod
|
|
2605
|
+
def _get_qualified_table_name(schema_name, table_name):
|
|
2606
|
+
"""
|
|
2607
|
+
DESCRIPTION:
|
|
2608
|
+
Internal function to get the fully qualified name of table.
|
|
2609
|
+
|
|
2610
|
+
PARAMETERS:
|
|
2611
|
+
schema_name:
|
|
2612
|
+
Required Argument.
|
|
2613
|
+
Specifies the name of the schema.
|
|
2614
|
+
Types: str
|
|
2615
|
+
|
|
2616
|
+
table_name:
|
|
2617
|
+
Required Argument.
|
|
2618
|
+
Specifies the name of the table.
|
|
2619
|
+
Types: str
|
|
2620
|
+
|
|
2621
|
+
RAISES:
|
|
2622
|
+
None.
|
|
2623
|
+
|
|
2624
|
+
RETURNS:
|
|
2625
|
+
string
|
|
2626
|
+
|
|
2627
|
+
EXAMPLES:
|
|
2628
|
+
>>> UtilFuncs._get_qualified_table_name("schema_name", "table_name")
|
|
2629
|
+
'"schema_name"."table_name"'
|
|
2630
|
+
"""
|
|
2631
|
+
return '"{}"."{}"'.format(schema_name, table_name)
|
|
2632
|
+
|
|
2633
|
+
|
|
2509
2634
|
from teradataml.common.aed_utils import AedUtils
|
|
2510
2635
|
from teradataml.dbutils.filemgr import remove_file
|
teradataml/context/context.py
CHANGED
|
@@ -1010,19 +1010,21 @@ def _get_host():
|
|
|
1010
1010
|
EXAMPLES:
|
|
1011
1011
|
_get_host()
|
|
1012
1012
|
"""
|
|
1013
|
-
|
|
1014
|
-
|
|
1013
|
+
if td_connection is None:
|
|
1014
|
+
return None
|
|
1015
|
+
else:
|
|
1016
|
+
return td_sqlalchemy_engine.url.host
|
|
1015
1017
|
|
|
1016
1018
|
def _get_host_ip():
|
|
1017
1019
|
"""
|
|
1018
1020
|
DESCRIPTION:
|
|
1019
|
-
Function to return the host IP address.
|
|
1021
|
+
Function to return the host IP address or host name associated with the current context.
|
|
1020
1022
|
|
|
1021
1023
|
PARAMETERS:
|
|
1022
1024
|
None.
|
|
1023
1025
|
|
|
1024
1026
|
RETURNS:
|
|
1025
|
-
Host IP address.
|
|
1027
|
+
Host IP address or host name associated with the current context.
|
|
1026
1028
|
|
|
1027
1029
|
RAISES:
|
|
1028
1030
|
None.
|
|
@@ -1034,21 +1036,29 @@ def _get_host_ip():
|
|
|
1034
1036
|
if td_connection is None:
|
|
1035
1037
|
return None
|
|
1036
1038
|
|
|
1037
|
-
|
|
1039
|
+
host = _get_host()
|
|
1038
1040
|
try:
|
|
1039
1041
|
# Validate if host_ip is a valid IP address (IPv4 or IPv6)
|
|
1040
|
-
ipaddress.ip_address(
|
|
1042
|
+
ipaddress.ip_address(host)
|
|
1043
|
+
return host
|
|
1041
1044
|
except ValueError:
|
|
1042
1045
|
# If host is not an IP address, get the IP address by DNS name from _InternalBuffer.
|
|
1043
|
-
|
|
1044
|
-
if
|
|
1045
|
-
|
|
1046
|
+
dns_host_ip = _InternalBuffer.get('dns_host_ip')
|
|
1047
|
+
if dns_host_ip:
|
|
1048
|
+
return dns_host_ip
|
|
1049
|
+
|
|
1050
|
+
# If DNS host ip not found, resolve the host name to get the IP address.
|
|
1051
|
+
# If there is issue in resolving the host name, it will proceed with DNS host as it is.
|
|
1052
|
+
try:
|
|
1046
1053
|
# Get the list of addresses(compatible for both IPv4 and IPv6)
|
|
1047
|
-
addr_info = socket.getaddrinfo(
|
|
1054
|
+
addr_info = socket.getaddrinfo(host, None)
|
|
1048
1055
|
# Pick the first address from the list
|
|
1049
1056
|
host_ip = addr_info[0][4][0]
|
|
1050
1057
|
# Add the DNS host IP to the _InternalBuffer.
|
|
1051
1058
|
_InternalBuffer.add(dns_host_ip=host_ip)
|
|
1059
|
+
except socket.gaierror:
|
|
1060
|
+
# Use dns host as it is
|
|
1061
|
+
host_ip = host
|
|
1052
1062
|
return host_ip
|
|
1053
1063
|
|
|
1054
1064
|
class ContextUtilFuncs():
|
|
Binary file
|
|
@@ -152,6 +152,22 @@
|
|
|
152
152
|
"item" : "varchar(20)",
|
|
153
153
|
"sku" : "integer",
|
|
154
154
|
"category" : "varchar(20)"
|
|
155
|
-
|
|
156
|
-
|
|
155
|
+
},
|
|
156
|
+
"medical_readings": {
|
|
157
|
+
"patient_id": "BIGINT",
|
|
158
|
+
"record_timestamp": "timestamp",
|
|
159
|
+
"glucose": "BIGINT",
|
|
160
|
+
"blood_pressure": "BIGINT",
|
|
161
|
+
"insulin": "BIGINT",
|
|
162
|
+
"diabetes_pedigree_function": "FLOAT",
|
|
163
|
+
"outcome": "BIGINT"
|
|
164
|
+
},
|
|
165
|
+
"patient_profile": {
|
|
166
|
+
"patient_id": "BIGINT",
|
|
167
|
+
"record_timestamp": "timestamp",
|
|
168
|
+
"pregnancies": "BIGINT",
|
|
169
|
+
"age": "BIGINT",
|
|
170
|
+
"bmi": "FLOAT",
|
|
171
|
+
"skin_thickness": "FLOAT"
|
|
172
|
+
}
|
|
157
173
|
}
|
|
@@ -131,7 +131,7 @@ def NaiveBayes(data = None, response_column = None, numeric_inputs = None,
|
|
|
131
131
|
display_analytic_functions()
|
|
132
132
|
|
|
133
133
|
# Import function NaiveBayes.
|
|
134
|
-
from teradataml import NaiveBayes
|
|
134
|
+
from teradataml import NaiveBayes, Unpivoting
|
|
135
135
|
|
|
136
136
|
# Example 1: NaiveBayes function to generate classification model using Dense input.
|
|
137
137
|
NaiveBayes_out = NaiveBayes(data=housing_train, response_column='homestyle',
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
def Shap(data = None, object = None, training_function = "TD_GLM",
|
|
1
|
+
def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM",
|
|
2
2
|
model_type = "Regression", input_columns = None, detailed = False,
|
|
3
3
|
accumulate = None, num_parallel_trees = 1000, num_boost_rounds = 10,
|
|
4
4
|
**generic_arguments):
|
|
@@ -19,6 +19,12 @@ def Shap(data = None, object = None, training_function = "TD_GLM",
|
|
|
19
19
|
Required Argument.
|
|
20
20
|
Specifies the teradataml DataFrame containing the model data.
|
|
21
21
|
Types: teradataml DataFrame
|
|
22
|
+
|
|
23
|
+
id_column:
|
|
24
|
+
Required Argument.
|
|
25
|
+
Specifies the input data column name that has the unique identifier
|
|
26
|
+
for each row in the "data".
|
|
27
|
+
Types: str
|
|
22
28
|
|
|
23
29
|
training_function:
|
|
24
30
|
Required Argument.
|
|
@@ -133,10 +133,10 @@ def TDNaiveBayesPredict(data = None, object = None, id_column = None,
|
|
|
133
133
|
# Check the list of available analytic functions.
|
|
134
134
|
display_analytic_functions()
|
|
135
135
|
|
|
136
|
-
# Import function
|
|
137
|
-
from teradataml import
|
|
136
|
+
# Import function TDNaiveBayesPredict.
|
|
137
|
+
from teradataml import TDNaiveBayesPredict, NaiveBayes, Unpivoting
|
|
138
138
|
|
|
139
|
-
# Example 1:
|
|
139
|
+
# Example 1: TDNaiveBayesPredict function to predict the classification label using Dense input.
|
|
140
140
|
NaiveBayes_out = NaiveBayes(data=housing_train, response_column='homestyle',
|
|
141
141
|
numeric_inputs=['price','lotsize','bedrooms','bathrms','stories','garagepl'],
|
|
142
142
|
categorical_inputs=['driveway','recroom','fullbase','gashw','airco','prefarea'])
|
|
@@ -152,7 +152,7 @@ def TDNaiveBayesPredict(data = None, object = None, id_column = None,
|
|
|
152
152
|
# Print the result DataFrame.
|
|
153
153
|
print( NaiveBayesPredict_out.result)
|
|
154
154
|
|
|
155
|
-
# Example 2:
|
|
155
|
+
# Example 2: TDNaiveBayesPredict function to predict the classification label using Sparse input.
|
|
156
156
|
|
|
157
157
|
# Unpivoting the data for sparse input to naive bayes.
|
|
158
158
|
upvt_train = Unpivoting(data = housing_train, id_column = 'sn',
|