teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (88) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +196 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +79 -4
  6. teradataml/analytics/json_parser/metadata.py +12 -3
  7. teradataml/analytics/json_parser/utils.py +7 -2
  8. teradataml/analytics/sqle/__init__.py +1 -0
  9. teradataml/analytics/table_operator/__init__.py +1 -1
  10. teradataml/analytics/uaf/__init__.py +1 -1
  11. teradataml/analytics/utils.py +4 -0
  12. teradataml/automl/data_preparation.py +3 -2
  13. teradataml/automl/feature_engineering.py +15 -7
  14. teradataml/automl/model_training.py +39 -33
  15. teradataml/common/__init__.py +2 -1
  16. teradataml/common/constants.py +35 -0
  17. teradataml/common/garbagecollector.py +2 -1
  18. teradataml/common/messagecodes.py +8 -2
  19. teradataml/common/messages.py +3 -1
  20. teradataml/common/sqlbundle.py +25 -3
  21. teradataml/common/utils.py +134 -9
  22. teradataml/context/context.py +20 -10
  23. teradataml/data/SQL_Fundamentals.pdf +0 -0
  24. teradataml/data/dataframe_example.json +18 -2
  25. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  26. teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
  27. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  28. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  29. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  30. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  31. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  32. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  33. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  34. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  35. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  36. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  37. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  38. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  39. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  40. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  41. teradataml/data/medical_readings.csv +101 -0
  42. teradataml/data/patient_profile.csv +101 -0
  43. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  44. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  45. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  46. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  47. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  48. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  49. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  50. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  51. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  52. teradataml/data/target_udt_data.csv +8 -0
  53. teradataml/data/templates/open_source_ml.json +3 -2
  54. teradataml/data/vectordistance_example.json +4 -0
  55. teradataml/dataframe/dataframe.py +543 -175
  56. teradataml/dataframe/functions.py +553 -25
  57. teradataml/dataframe/sql.py +184 -15
  58. teradataml/dbutils/dbutils.py +556 -18
  59. teradataml/dbutils/filemgr.py +48 -1
  60. teradataml/lib/aed_0_1.dll +0 -0
  61. teradataml/opensource/__init__.py +1 -1
  62. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  63. teradataml/opensource/_lightgbm.py +950 -0
  64. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  65. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  66. teradataml/opensource/sklearn/__init__.py +0 -1
  67. teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
  68. teradataml/options/__init__.py +7 -23
  69. teradataml/options/configure.py +29 -3
  70. teradataml/scriptmgmt/UserEnv.py +3 -3
  71. teradataml/scriptmgmt/lls_utils.py +74 -21
  72. teradataml/store/__init__.py +13 -0
  73. teradataml/store/feature_store/__init__.py +0 -0
  74. teradataml/store/feature_store/constants.py +291 -0
  75. teradataml/store/feature_store/feature_store.py +2223 -0
  76. teradataml/store/feature_store/models.py +1505 -0
  77. teradataml/store/vector_store/__init__.py +1586 -0
  78. teradataml/table_operators/query_generator.py +3 -0
  79. teradataml/table_operators/table_operator_query_generator.py +3 -1
  80. teradataml/table_operators/table_operator_util.py +37 -38
  81. teradataml/table_operators/templates/dataframe_register.template +69 -0
  82. teradataml/utils/dtypes.py +4 -2
  83. teradataml/utils/validators.py +33 -1
  84. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
  85. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
  86. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  87. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  88. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -28,6 +28,7 @@ from teradataml.dataframe.dataframe import DataFrame
28
28
  from teradataml import execute_sql, get_connection
29
29
  from teradataml import SVM, GLM, DecisionForest, XGBoost, GridSearch, KNN, RandomSearch
30
30
  from teradataml.utils.validators import _Validators
31
+ from teradataml.common.utils import UtilFuncs
31
32
 
32
33
 
33
34
  class _ModelTraining:
@@ -796,7 +797,8 @@ class _ModelTraining:
796
797
  trained_models = []
797
798
  for param in model_params:
798
799
  result = self._hyperparameter_tunning(param, trainng_datas)
799
- trained_models.append(result)
800
+ if result is not None:
801
+ trained_models.append(result)
800
802
 
801
803
  models_df = pd.concat(trained_models, ignore_index=True)
802
804
  return models_df
@@ -886,39 +888,43 @@ class _ModelTraining:
886
888
  # Getting all passed models
887
889
  model_info = _obj.model_stats.merge(_obj.models[_obj.models['STATUS']=='PASS'][['MODEL_ID', 'DATA_ID', 'PARAMETERS']],
888
890
  on='MODEL_ID', how='inner')
889
- # Creating mapping data ID to feature selection method
890
- data_id_to_table_map = {"DF_0": ('lasso', train_data[0]._table_name),
891
- "DF_1": ('rfe', train_data[1]._table_name),
892
- "DF_2": ('pca', train_data[2]._table_name)}
893
-
894
- # Updating model stats with feature selection method and result table
895
- for index, row in model_info.iterrows():
896
- model_info.loc[index, 'FEATURE_SELECTION'] = data_id_to_table_map[row['DATA_ID']][0]
897
- model_info.loc[index, 'DATA_TABLE'] = data_id_to_table_map[row['DATA_ID']][1]
898
- model_info.loc[index, 'RESULT_TABLE'] = _obj.get_model(row['MODEL_ID']).result._table_name
899
- model_info.loc[index, 'model-obj'] = _obj.get_model(row['MODEL_ID'])
900
-
901
- # Dropping column 'DATA_ID'
902
- model_info.drop(['DATA_ID'], axis=1, inplace=True)
903
-
904
- model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
905
-
906
- if not self.is_classification_type():
907
- # Calculating Adjusted-R2 for regression
908
- # Getting size and feature count for each feature selection method
909
- methods = ["lasso", "rfe", "pca"]
910
- size_map = {method : df.select('id').size for method, df in zip(methods, train_data)}
911
- feature_count_map = {method : len(df.columns) - 2 for method, df in zip(methods, train_data)}
912
- model_info['ADJUSTED_R2'] = model_info.apply(lambda row:
913
- 1 - ((1 - row['R2']) * (size_map[row['FEATURE_SELECTION']] - 1) /
914
- (size_map[row['FEATURE_SELECTION']] - feature_count_map[row['FEATURE_SELECTION']] - 1)), axis=1)
915
-
916
- self._display_msg(msg="-"*100,
917
- progress_bar=self.progress_bar,
918
- show_data=True)
919
- self.progress_bar.update()
891
+ if not model_info.empty:
892
+ # Creating mapping data ID to feature selection method
893
+ data_id_to_table_map = {"DF_0": ('lasso', train_data[0]._table_name),
894
+ "DF_1": ('rfe', train_data[1]._table_name),
895
+ "DF_2": ('pca', train_data[2]._table_name)}
896
+
897
+ # Updating model stats with feature selection method and result table
898
+ for index, row in model_info.iterrows():
899
+ model_info.loc[index, 'FEATURE_SELECTION'] = data_id_to_table_map[row['DATA_ID']][0]
900
+ model_info.loc[index, 'DATA_TABLE'] = data_id_to_table_map[row['DATA_ID']][1]
901
+ model_info.loc[index, 'RESULT_TABLE'] = _obj.get_model(row['MODEL_ID']).result._table_name
902
+ model_info.loc[index, 'model-obj'] = _obj.get_model(row['MODEL_ID'])
903
+
904
+ # Dropping column 'DATA_ID'
905
+ model_info.drop(['DATA_ID'], axis=1, inplace=True)
920
906
 
921
- return model_info
907
+ model_info.insert(1, 'FEATURE_SELECTION', model_info.pop('FEATURE_SELECTION'))
908
+
909
+ if not self.is_classification_type():
910
+ # Calculating Adjusted-R2 for regression
911
+ # Getting size and feature count for each feature selection method
912
+ methods = ["lasso", "rfe", "pca"]
913
+ size_map = {method : df.select('id').size for method, df in zip(methods, train_data)}
914
+ feature_count_map = {method : len(df.columns) - 2 for method, df in zip(methods, train_data)}
915
+ model_info['ADJUSTED_R2'] = model_info.apply(lambda row:
916
+ 1 - ((1 - row['R2']) * (size_map[row['FEATURE_SELECTION']] - 1) /
917
+ (size_map[row['FEATURE_SELECTION']] - feature_count_map[row['FEATURE_SELECTION']] - 1)), axis=1)
918
+
919
+ self._display_msg(msg="-"*100,
920
+ progress_bar=self.progress_bar,
921
+ show_data=True)
922
+ self.progress_bar.update()
923
+
924
+ return model_info
925
+
926
+ # Returning None, if no model is passed
927
+ return None
922
928
 
923
929
  @staticmethod
924
930
  def _eval_params_generation(ml_name,
@@ -1 +1,2 @@
1
- from teradataml.common.formula import as_categorical
1
+ from teradataml.common.formula import as_categorical
2
+ from teradataml.common.constants import Action, Permission
@@ -18,6 +18,14 @@ from teradataml.options.configure import configure
18
18
  from teradatasqlalchemy.types import (INTEGER, SMALLINT, BIGINT, BYTEINT, DECIMAL, FLOAT, NUMBER, VARCHAR)
19
19
  from teradatasqlalchemy.types import (DATE, TIME, TIMESTAMP)
20
20
  from teradatasqlalchemy.types import (BYTE, VARBYTE, BLOB)
21
+ from teradatasqlalchemy import (CHAR, CLOB)
22
+ from teradatasqlalchemy import (PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP)
23
+ from teradatasqlalchemy import (INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
24
+ INTERVAL_DAY,INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
25
+ INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
26
+ INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
27
+ INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND,
28
+ INTERVAL_SECOND)
21
29
  from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
22
30
 
23
31
 
@@ -53,6 +61,7 @@ class SQLConstants(Enum):
53
61
  SQL_DELETE_ALL_ROWS = 29
54
62
  SQL_DELETE_SPECIFIC_ROW = 30
55
63
  SQL_EXEC_STORED_PROCEDURE = 31
64
+ SQL_SELECT_COLUMNNAMES_WITH_WHERE = 32
56
65
  CONSTRAINT = ["check_constraint", "primary_key_constraint",
57
66
  "foreign_key_constraint", "unique_key_constraint"]
58
67
 
@@ -123,6 +132,14 @@ class TeradataTypes(Enum):
123
132
  TD_DATE_TYPES = [DATE, sqlalchemy.sql.sqltypes.Date]
124
133
  TD_DATE_CODES = ["DA"]
125
134
  TD_NULL_TYPE = "NULLTYPE"
135
+ TD_ALL_TYPES = (BYTEINT, SMALLINT, INTEGER, BIGINT, DECIMAL, FLOAT, NUMBER,
136
+ TIMESTAMP, DATE, TIME, CHAR, VARCHAR, CLOB, BYTE, VARBYTE,
137
+ BLOB, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP,
138
+ INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
139
+ INTERVAL_DAY, INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
140
+ INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
141
+ INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
142
+ INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND, INTERVAL_SECOND)
126
143
 
127
144
 
128
145
  class TeradataTableKindConstants(Enum):
@@ -427,6 +444,8 @@ class TableOperatorConstants(Enum):
427
444
  APPLY_TEMPLATE = "dataframe_apply.template"
428
445
  # Template of the intermediate script that will be generated for UDF.
429
446
  UDF_TEMPLATE = "dataframe_udf.template"
447
+ # Template of the intermediate script that will be generated for register.
448
+ REGISTER_TEMPLATE = "dataframe_register.template"
430
449
  # In-DB execution mode.
431
450
  INDB_EXEC = "IN-DB"
432
451
  # Local execution mode.
@@ -443,6 +462,8 @@ class TableOperatorConstants(Enum):
443
462
  APPLY_OP = "apply"
444
463
  # udf operation.
445
464
  UDF_OP = "udf"
465
+ # register operation.
466
+ REGISTER_OP = "register"
446
467
  # Template of the script_executor that will be used to generate the temporary script_executor file.
447
468
  SCRIPT_TEMPLATE = "script_executor.template"
448
469
  # Log Type.
@@ -480,6 +501,9 @@ class TableOperatorConstants(Enum):
480
501
  "delimiter(' ') " \
481
502
  "returns('package VARCHAR({2}), " \
482
503
  "version VARCHAR({2})'))"
504
+
505
+ SCRIPT_LIST_FILES_QUERY = "SELECT DISTINCT * FROM SCRIPT (SCRIPT_COMMAND " \
506
+ "('ls ./{}') RETURNS ('Files VARCHAR({})'))"
483
507
 
484
508
  class ValibConstants(Enum):
485
509
  # A dictionary that maps teradataml name of the exposed VALIB function name
@@ -1473,3 +1497,14 @@ class SessionParamsPythonNames:
1473
1497
  DATABASE = "Current DataBase"
1474
1498
  DATEFORM = 'Current DateForm'
1475
1499
 
1500
+
1501
+ class Action(Enum):
1502
+ # Holds variable names for the type of grant to be provided.
1503
+ GRANT = "GRANT"
1504
+ REVOKE = "REVOKE"
1505
+
1506
+ class Permission(Enum):
1507
+ # Holds variable names for the type of permission to be provided.
1508
+ READ = "READ"
1509
+ WRITE = "WRITE"
1510
+
@@ -520,7 +520,8 @@ class GarbageCollector():
520
520
  fileparts = file.split(GarbageCollector.__filenameseperator)
521
521
  hostname = fileparts[1]
522
522
  filepid = int(fileparts[2])
523
- if hostname == tdmlctx.context._get_host_ip():
523
+ # Check for both host ip and hostname in case user passed hostname for creating connection.
524
+ if hostname == tdmlctx.context._get_host_ip() or hostname == tdmlctx.context._get_host():
524
525
  if filepid == os.getpid() or not psutil.pid_exists(filepid):
525
526
  tempfiles.append(filepath)
526
527
  except (IndexError, ValueError):
@@ -86,6 +86,7 @@ class ErrorInfoCodes(Enum):
86
86
  LIST_DB_TABLES_FAILED = 'TDML_2053'
87
87
  INVALID_CONTEXT_CONNECTION = 'TDML_2054'
88
88
  TDMLDF_REQUIRED_TABLE_ALIAS = "TDML_2055"
89
+ TDMLDF_ALIAS_REQUIRED = TDMLDF_REQUIRED_TABLE_ALIAS
89
90
  TDMLDF_COLUMN_ALREADY_EXISTS = "TDML_2056"
90
91
  TDMLDF_AGGREGATE_INVALID_COLUMN = 'TDML_2057'
91
92
  TDMLDF_AGGREGATE_COMBINED_ERR = 'TDML_2058'
@@ -221,6 +222,9 @@ class ErrorInfoCodes(Enum):
221
222
  INVALID_PARTITIONING_COLS = 'TDML_2540'
222
223
  TARGET_COL_NOT_FOUND_FOR_EVALUATE = 'TDML_2541'
223
224
 
225
+ # OpenAF Error codes starting from 2551 - Reserved till 2560.
226
+ AUTH_TOKEN_REQUIRED = 'TDML_2551'
227
+
224
228
  class MessageCodes(Enum):
225
229
  """
226
230
  MessageCodes contains all the messages that are displayed to the user which are informational
@@ -274,6 +278,7 @@ class MessageCodes(Enum):
274
278
  "and df.select([['col1', 'col2', 'col3']])."
275
279
  TDMLDF_INVALID_TABLE_ALIAS = "{} should not be equal."
276
280
  TDMLDF_REQUIRED_TABLE_ALIAS = "All arguments lsuffix, rsuffix, lprefix and rprefix should not be None as TeradataML DataFrames contains common column(s)."
281
+ TDMLDF_ALIAS_REQUIRED = "Use aliased DataFrames for self {}."
277
282
  TDMLDF_COLUMN_ALREADY_EXISTS = "Column name with alias '{}' already exists in {} TeradataML DataFrame, change '{}'"
278
283
  TDMLDF_INVALID_JOIN_CONDITION = "Invalid 'on' condition(s): '{}', check documentation for valid conditions."
279
284
  TDMLDF_UNEQUAL_NUMBER_OF_COLUMNS = "Number of columns in '{}' and '{}' should be equal."
@@ -419,9 +424,10 @@ class MessageCodes(Enum):
419
424
  "they should be present in '{}' DataFrame."
420
425
  PARTITIONING_COLS_IN_FEATURE_COLS = "Columns in '{}' argument should not be part of"\
421
426
  " feature columns."
422
- PARTITION_VALUES_NOT_MATCHING = "Values in training and test data partition columns should be same."
427
+ PARTITION_VALUES_NOT_MATCHING = "Values in {} and {} data partition columns should be same."
423
428
  PARTITION_IN_BOTH_FIT_AND_PREDICT = "Use \"partition_columns\" only if model is fitted with partition_column(s)."
424
429
  INVALID_PARTITIONING_COLS = "Provided partition_column(s) '{}' is/are not present in parent of '{}' DataFrame(s)."
425
430
  PATH_NOT_FOUND = "Specified local path '{}' not found. Please check the path."
426
431
  TARGET_COL_NOT_FOUND_FOR_EVALUATE = "Target column '{}' not found in the passed dataFrame. "\
427
- "evaluate() requires target column to be present in the dataFrame."
432
+ "evaluate() requires target column to be present in the dataFrame."
433
+ AUTH_TOKEN_REQUIRED = "Authentication token is required to run '{}'. Set the token using set_auth_token()."
@@ -88,6 +88,7 @@ class Messages():
88
88
  [ErrorInfoCodes.TDMLDF_INVALID_JOIN_CONDITION, MessageCodes.TDMLDF_INVALID_JOIN_CONDITION],
89
89
  [ErrorInfoCodes.TDMLDF_INVALID_TABLE_ALIAS, MessageCodes.TDMLDF_INVALID_TABLE_ALIAS],
90
90
  [ErrorInfoCodes.TDMLDF_REQUIRED_TABLE_ALIAS, MessageCodes.TDMLDF_REQUIRED_TABLE_ALIAS],
91
+ [ErrorInfoCodes.TDMLDF_ALIAS_REQUIRED, MessageCodes.TDMLDF_ALIAS_REQUIRED],
91
92
  [ErrorInfoCodes.TDMLDF_COLUMN_ALREADY_EXISTS, MessageCodes.TDMLDF_COLUMN_ALREADY_EXISTS],
92
93
  [ErrorInfoCodes.INVALID_LENGTH_ARGS, MessageCodes.INVALID_LENGTH_ARGS],
93
94
  [ErrorInfoCodes.TDMLDF_AGGREGATE_UNSUPPORTED, MessageCodes.TDMLDF_AGGREGATE_UNSUPPORTED],
@@ -190,7 +191,8 @@ class Messages():
190
191
  [ErrorInfoCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT, MessageCodes.PARTITION_IN_BOTH_FIT_AND_PREDICT],
191
192
  [ErrorInfoCodes.INVALID_PARTITIONING_COLS, MessageCodes.INVALID_PARTITIONING_COLS],
192
193
  [ErrorInfoCodes.PATH_NOT_FOUND, MessageCodes.PATH_NOT_FOUND],
193
- [ErrorInfoCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE, MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE]
194
+ [ErrorInfoCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE, MessageCodes.TARGET_COL_NOT_FOUND_FOR_EVALUATE],
195
+ [ErrorInfoCodes.AUTH_TOKEN_REQUIRED, MessageCodes.AUTH_TOKEN_REQUIRED],
194
196
  ]
195
197
 
196
198
  @staticmethod
@@ -67,7 +67,8 @@ class SQLBundle:
67
67
  [SQLConstants.SQL_DELETE_ALL_ROWS, "DELETE FROM {0}"],
68
68
  [SQLConstants.SQL_DELETE_SPECIFIC_ROW, "DELETE FROM {0} WHERE {1}"],
69
69
  [SQLConstants.SQL_CREATE_TABLE_USING_COLUMNS, "CREATE MULTISET TABLE {0}( {1} )"],
70
- [SQLConstants.SQL_EXEC_STORED_PROCEDURE, "call {0}"]
70
+ [SQLConstants.SQL_EXEC_STORED_PROCEDURE, "call {0}"],
71
+ [SQLConstants.SQL_SELECT_COLUMNNAMES_WITH_WHERE, "sel {0} from {1} where {2}"],
71
72
 
72
73
  ]
73
74
  self._add_sql_version()
@@ -446,11 +447,32 @@ class SQLBundle:
446
447
  query = sqlbundle._get_sql_query(SQLConstants.SQL_SELECT_DATABASENAME).format(schema_name)
447
448
  if table_name:
448
449
  if '%' in table_name:
449
- query = "{0}{1}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME_LIKE).format(table_name))
450
+ # Check if '%' is present in the between of table name excluding first and last character
451
+ # as '%' can be present at the start or end of table name
452
+ # and replace single quotes with empty string
453
+ # Checking for following cases with % in between table name:
454
+ # eg: table_name = 'ab%c', '%a%bc', '%ab%c%' or 'ab%c%'
455
+
456
+ # table_name[1:-1] - Removing single quotes from table name
457
+ # table_name[1:-1][1:-1] - Removing first and last character from table_name[1:-1]
458
+ if '%' in table_name[1:-1][1:-1]:
459
+ query = "{0}{1}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME_LIKE).format(table_name))
460
+ else:
461
+ # Checking for following cases with % at the start or end of table name:
462
+ # eg: table_name = '%abc', 'abc%', '%abc%'
463
+ # Extracting table name without '%' character
464
+ table_name_str = table_name.replace('%', '')
465
+
466
+ # Adding condition to check if table name contains the string using POSITION function
467
+ # POSITION function returns the position index of the substring in the string if found,
468
+ # else returns 0
469
+ query = "{0}{1}{2}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME_LIKE).format(table_name), \
470
+ " AND POSITION({0} IN TABLENAME) > 0".format(table_name_str))
450
471
  else:
451
- query = "{0}{1}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME).format(table_name))
472
+ query = "{0}{1}".format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_NAME_LIKE).format(table_name))
452
473
  if table_kind:
453
474
  query = '{0}{1}'.format(query, sqlbundle._get_sql_query(SQLConstants.SQL_AND_TABLE_KIND).format(table_kind))
475
+
454
476
  return query
455
477
 
456
478
  # TODO :: Following SQLConstants needs to be implemented as and when needed.
@@ -13,6 +13,7 @@ by other classes which can be reused according to the need.
13
13
  Add all the common functions in this class like creating temporary table names, getting
14
14
  the datatypes etc.
15
15
  """
16
+ from inspect import getsource
16
17
  import json
17
18
  import uuid
18
19
  from math import floor
@@ -43,6 +44,7 @@ from teradataml.options.configure import configure
43
44
  from teradataml.options.display import display
44
45
  from teradataml.common.constants import TeradataReservedKeywords, TeradataConstants
45
46
 
47
+ from teradataml.utils.internal_buffer import _InternalBuffer
46
48
  from teradatasqlalchemy.types import _TDType
47
49
  from teradatasqlalchemy.types import (INTEGER, SMALLINT, BIGINT, BYTEINT,
48
50
  DECIMAL, FLOAT, NUMBER)
@@ -1249,6 +1251,41 @@ class UtilFuncs():
1249
1251
  return UtilFuncs._teradata_quote_arg(keyword, "\"", False)
1250
1252
 
1251
1253
  return keyword
1254
+
1255
+ def _contains_space(item):
1256
+ """
1257
+ Check if the specified string in item has spaces or tabs in it.
1258
+
1259
+ PARAMETERS:
1260
+ item:
1261
+ Required Argument.
1262
+ Specifies a string to check for spaces or tabs.
1263
+ Types: str
1264
+
1265
+ RETURNS:
1266
+ True, if the specified string has spaces or tabs in it, else False.
1267
+
1268
+ RAISES:
1269
+ None.
1270
+
1271
+ EXAMPLES:
1272
+ # Passing column name with spaces returns True.
1273
+ is_space = UtilFuncs._contains_space("col name")
1274
+ print(is_space)
1275
+ # Passing column name without spaces returns False.
1276
+ is_space = UtilFuncs._contains_space("colname")
1277
+ print(is_space)
1278
+ """
1279
+ # Check if the input is a string and look for spaces or tabs
1280
+ if isinstance(item, str):
1281
+ return any(char in {' ', '\t'} for char in item)
1282
+
1283
+ # If the input is a list, check each element
1284
+ if isinstance(item, list):
1285
+ # Check each item in the list
1286
+ return any(UtilFuncs._contains_space(col) for col in item)
1287
+
1288
+ return False
1252
1289
 
1253
1290
  @staticmethod
1254
1291
  def _in_schema(schema_name, table_name):
@@ -2358,8 +2395,16 @@ class UtilFuncs():
2358
2395
  >>> self._is_lake()
2359
2396
  """
2360
2397
 
2361
- from teradataml.context.context import _get_database_version
2362
- return int(_get_database_version().split(".")[0]) >= 20
2398
+ tbl_operator = configure.table_operator.lower() \
2399
+ if configure.table_operator else None
2400
+
2401
+ # If the user does not provide a table_operator, check the database version
2402
+ # and determine the system type accordingly.
2403
+ if tbl_operator is None:
2404
+ from teradataml.context.context import _get_database_version
2405
+ return int(_get_database_version().split(".")[0]) >= 20
2406
+
2407
+ return tbl_operator == "apply"
2363
2408
 
2364
2409
  @staticmethod
2365
2410
  def _get_python_execution_path():
@@ -2379,8 +2424,8 @@ class UtilFuncs():
2379
2424
  EXAMPLES:
2380
2425
  >>> self._get_python_execution_path()
2381
2426
  """
2382
- # 'indb_install_location' expects python installation directory path.
2383
- # Hence, postfixing python binary path.
2427
+ # 'indb_install_location' expects python installation directory path.
2428
+ # Hence, postfixing python binary path.
2384
2429
  return "python" if UtilFuncs._is_lake() else \
2385
2430
  '{}/bin/python3'.format(configure.indb_install_location)
2386
2431
 
@@ -2403,7 +2448,7 @@ class UtilFuncs():
2403
2448
  """
2404
2449
  db_name = UtilFuncs._teradata_unquote_arg(UtilFuncs._extract_db_name(tablename), "\"")
2405
2450
  table_view_name = UtilFuncs._teradata_unquote_arg(UtilFuncs._extract_table_name(tablename), "\"")
2406
- query = SQLBundle._build_select_table_kind(db_name, "{0}".format(table_view_name), "'V'")
2451
+ query = SQLBundle._build_select_table_kind(db_name, "'{0}'".format(table_view_name), "'V'")
2407
2452
 
2408
2453
  df = UtilFuncs._execute_query(query)
2409
2454
  if len(df) > 0:
@@ -2471,7 +2516,7 @@ class UtilFuncs():
2471
2516
  except Exception as exc:
2472
2517
  raise exc
2473
2518
 
2474
- def _get_env_name(col):
2519
+ def _get_env_name(col=None):
2475
2520
  """
2476
2521
  DESCRIPTION:
2477
2522
  Internal function to get the env name if passed with ColumnExpression
@@ -2479,9 +2524,10 @@ class UtilFuncs():
2479
2524
 
2480
2525
  PARAMETERS:
2481
2526
  col:
2482
- Required Argument.
2527
+ Optional Argument.
2483
2528
  Specifies teradataml DataFrame ColumnExpression.
2484
2529
  Types: teradataml DataFrame ColumnExpression
2530
+ Default Value: None
2485
2531
 
2486
2532
  RAISES:
2487
2533
  None.
@@ -2493,10 +2539,10 @@ class UtilFuncs():
2493
2539
  >>> self._get_env_name(col)
2494
2540
  """
2495
2541
 
2496
- # If env_name is passed with ColumnExpression fetch the env name,
2542
+ # If ColumnExpression is passed and env_name is passed with it fetch the env name,
2497
2543
  # else check if default "openml_user_env" env is configured or not,
2498
2544
  # else get the default "openml_env" env if exists or create new deafult env.
2499
- if col._env_name is not None:
2545
+ if col and col._env_name is not None:
2500
2546
  from teradataml.scriptmgmt.UserEnv import UserEnv
2501
2547
  env = col._env_name
2502
2548
  env_name = env.env_name if isinstance(col._env_name, UserEnv) else env
@@ -2506,5 +2552,84 @@ class UtilFuncs():
2506
2552
  env_name = UtilFuncs._create_or_get_env("open_source_ml.json").env_name
2507
2553
  return env_name
2508
2554
 
2555
+ def _func_to_string(user_functions):
2556
+ """
2557
+ DESCRIPTION:
2558
+ Internal function to get the user functions in a single string format.
2559
+
2560
+ PARAMETERS:
2561
+ user_functions:
2562
+ Required Argument.
2563
+ List of user functions.
2564
+ Types: list
2565
+
2566
+ RAISES:
2567
+ None.
2568
+
2569
+ RETURNS:
2570
+ string
2571
+
2572
+ EXAMPLES:
2573
+ >>> from teradataml.dataframe.functions import udf
2574
+ >>> @udf(returns=VARCHAR())
2575
+ ... def sum(x, y):
2576
+ ... return x+y
2577
+ >>>
2578
+ >>> def to_upper(s):
2579
+ ... return s.upper()
2580
+ >>> user_functions = [sum(1,2)._udf, to_upper]
2581
+ >>> res = self._func_to_string(user_functions)
2582
+ >>> print(res)
2583
+ def sum(x, y):
2584
+ return x+y
2585
+
2586
+ def to_upper(s):
2587
+ return s.upper()
2588
+
2589
+ >>>
2590
+ """
2591
+ user_function_code = ""
2592
+ for func in user_functions:
2593
+ # Get the source code of the user function.
2594
+ func = getsource(func)
2595
+ # If the function have any extra space in the beginning remove it.
2596
+ func = func.lstrip()
2597
+ # Function can have decorator,e.g. udf as decorator, remove it.
2598
+ if func.startswith("@"):
2599
+ func = func[func.find("\n")+1: ].lstrip()
2600
+ # If multiple functions are passed, separate them with new line.
2601
+ user_function_code += func + '\n'
2602
+ return user_function_code
2603
+
2604
+ @staticmethod
2605
+ def _get_qualified_table_name(schema_name, table_name):
2606
+ """
2607
+ DESCRIPTION:
2608
+ Internal function to get the fully qualified name of table.
2609
+
2610
+ PARAMETERS:
2611
+ schema_name:
2612
+ Required Argument.
2613
+ Specifies the name of the schema.
2614
+ Types: str
2615
+
2616
+ table_name:
2617
+ Required Argument.
2618
+ Specifies the name of the table.
2619
+ Types: str
2620
+
2621
+ RAISES:
2622
+ None.
2623
+
2624
+ RETURNS:
2625
+ string
2626
+
2627
+ EXAMPLES:
2628
+ >>> UtilFuncs._get_qualified_table_name("schema_name", "table_name")
2629
+ '"schema_name"."table_name"'
2630
+ """
2631
+ return '"{}"."{}"'.format(schema_name, table_name)
2632
+
2633
+
2509
2634
  from teradataml.common.aed_utils import AedUtils
2510
2635
  from teradataml.dbutils.filemgr import remove_file
@@ -1010,19 +1010,21 @@ def _get_host():
1010
1010
  EXAMPLES:
1011
1011
  _get_host()
1012
1012
  """
1013
- global td_sqlalchemy_engine
1014
- return td_sqlalchemy_engine.url.host
1013
+ if td_connection is None:
1014
+ return None
1015
+ else:
1016
+ return td_sqlalchemy_engine.url.host
1015
1017
 
1016
1018
  def _get_host_ip():
1017
1019
  """
1018
1020
  DESCRIPTION:
1019
- Function to return the host IP address.
1021
+ Function to return the host IP address or host name associated with the current context.
1020
1022
 
1021
1023
  PARAMETERS:
1022
1024
  None.
1023
1025
 
1024
1026
  RETURNS:
1025
- Host IP address.
1027
+ Host IP address or host name associated with the current context.
1026
1028
 
1027
1029
  RAISES:
1028
1030
  None.
@@ -1034,21 +1036,29 @@ def _get_host_ip():
1034
1036
  if td_connection is None:
1035
1037
  return None
1036
1038
 
1037
- host_ip = _get_host()
1039
+ host = _get_host()
1038
1040
  try:
1039
1041
  # Validate if host_ip is a valid IP address (IPv4 or IPv6)
1040
- ipaddress.ip_address(host_ip)
1042
+ ipaddress.ip_address(host)
1043
+ return host
1041
1044
  except ValueError:
1042
1045
  # If host is not an IP address, get the IP address by DNS name from _InternalBuffer.
1043
- host_ip = _InternalBuffer.get('dns_host_ip')
1044
- if host_ip is None:
1045
- global td_sqlalchemy_engine
1046
+ dns_host_ip = _InternalBuffer.get('dns_host_ip')
1047
+ if dns_host_ip:
1048
+ return dns_host_ip
1049
+
1050
+ # If DNS host ip not found, resolve the host name to get the IP address.
1051
+ # If there is issue in resolving the host name, it will proceed with DNS host as it is.
1052
+ try:
1046
1053
  # Get the list of addresses(compatible for both IPv4 and IPv6)
1047
- addr_info = socket.getaddrinfo(td_sqlalchemy_engine.url.host, None)
1054
+ addr_info = socket.getaddrinfo(host, None)
1048
1055
  # Pick the first address from the list
1049
1056
  host_ip = addr_info[0][4][0]
1050
1057
  # Add the DNS host IP to the _InternalBuffer.
1051
1058
  _InternalBuffer.add(dns_host_ip=host_ip)
1059
+ except socket.gaierror:
1060
+ # Use dns host as it is
1061
+ host_ip = host
1052
1062
  return host_ip
1053
1063
 
1054
1064
  class ContextUtilFuncs():
Binary file
@@ -152,6 +152,22 @@
152
152
  "item" : "varchar(20)",
153
153
  "sku" : "integer",
154
154
  "category" : "varchar(20)"
155
-
156
- }
155
+ },
156
+ "medical_readings": {
157
+ "patient_id": "BIGINT",
158
+ "record_timestamp": "timestamp",
159
+ "glucose": "BIGINT",
160
+ "blood_pressure": "BIGINT",
161
+ "insulin": "BIGINT",
162
+ "diabetes_pedigree_function": "FLOAT",
163
+ "outcome": "BIGINT"
164
+ },
165
+ "patient_profile": {
166
+ "patient_id": "BIGINT",
167
+ "record_timestamp": "timestamp",
168
+ "pregnancies": "BIGINT",
169
+ "age": "BIGINT",
170
+ "bmi": "FLOAT",
171
+ "skin_thickness": "FLOAT"
172
+ }
157
173
  }
@@ -131,7 +131,7 @@ def NaiveBayes(data = None, response_column = None, numeric_inputs = None,
131
131
  display_analytic_functions()
132
132
 
133
133
  # Import function NaiveBayes.
134
- from teradataml import NaiveBayes
134
+ from teradataml import NaiveBayes, Unpivoting
135
135
 
136
136
  # Example 1: NaiveBayes function to generate classification model using Dense input.
137
137
  NaiveBayes_out = NaiveBayes(data=housing_train, response_column='homestyle',
@@ -1,4 +1,4 @@
1
- def Shap(data = None, object = None, training_function = "TD_GLM",
1
+ def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM",
2
2
  model_type = "Regression", input_columns = None, detailed = False,
3
3
  accumulate = None, num_parallel_trees = 1000, num_boost_rounds = 10,
4
4
  **generic_arguments):
@@ -19,6 +19,12 @@ def Shap(data = None, object = None, training_function = "TD_GLM",
19
19
  Required Argument.
20
20
  Specifies the teradataml DataFrame containing the model data.
21
21
  Types: teradataml DataFrame
22
+
23
+ id_column:
24
+ Required Argument.
25
+ Specifies the input data column name that has the unique identifier
26
+ for each row in the "data".
27
+ Types: str
22
28
 
23
29
  training_function:
24
30
  Required Argument.
@@ -133,10 +133,10 @@ def TDNaiveBayesPredict(data = None, object = None, id_column = None,
133
133
  # Check the list of available analytic functions.
134
134
  display_analytic_functions()
135
135
 
136
- # Import function NaiveBayesPredict.
137
- from teradataml import NaiveBayesPredict
136
+ # Import function TDNaiveBayesPredict.
137
+ from teradataml import TDNaiveBayesPredict, NaiveBayes, Unpivoting
138
138
 
139
- # Example 1: NaiveBayesPredict function to predict the classification label using Dense input.
139
+ # Example 1: TDNaiveBayesPredict function to predict the classification label using Dense input.
140
140
  NaiveBayes_out = NaiveBayes(data=housing_train, response_column='homestyle',
141
141
  numeric_inputs=['price','lotsize','bedrooms','bathrms','stories','garagepl'],
142
142
  categorical_inputs=['driveway','recroom','fullbase','gashw','airco','prefarea'])
@@ -152,7 +152,7 @@ def TDNaiveBayesPredict(data = None, object = None, id_column = None,
152
152
  # Print the result DataFrame.
153
153
  print( NaiveBayesPredict_out.result)
154
154
 
155
- # Example 2: NaiveBayesPredict function to predict the classification label using Sparse input.
155
+ # Example 2: TDNaiveBayesPredict function to predict the classification label using Sparse input.
156
156
 
157
157
  # Unpivoting the data for sparse input to naive bayes.
158
158
  upvt_train = Unpivoting(data = housing_train, id_column = 'sn',