teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (108) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +71 -0
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +51 -24
  6. teradataml/analytics/json_parser/utils.py +11 -17
  7. teradataml/automl/__init__.py +103 -48
  8. teradataml/automl/data_preparation.py +55 -37
  9. teradataml/automl/data_transformation.py +131 -69
  10. teradataml/automl/feature_engineering.py +117 -185
  11. teradataml/automl/feature_exploration.py +9 -2
  12. teradataml/automl/model_evaluation.py +13 -25
  13. teradataml/automl/model_training.py +214 -75
  14. teradataml/catalog/model_cataloging_utils.py +1 -1
  15. teradataml/clients/auth_client.py +133 -0
  16. teradataml/common/aed_utils.py +3 -2
  17. teradataml/common/constants.py +11 -6
  18. teradataml/common/garbagecollector.py +5 -0
  19. teradataml/common/messagecodes.py +3 -1
  20. teradataml/common/messages.py +2 -1
  21. teradataml/common/utils.py +6 -0
  22. teradataml/context/context.py +49 -29
  23. teradataml/data/advertising.csv +201 -0
  24. teradataml/data/bank_marketing.csv +11163 -0
  25. teradataml/data/bike_sharing.csv +732 -0
  26. teradataml/data/boston2cols.csv +721 -0
  27. teradataml/data/breast_cancer.csv +570 -0
  28. teradataml/data/customer_segmentation_test.csv +2628 -0
  29. teradataml/data/customer_segmentation_train.csv +8069 -0
  30. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
  31. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
  32. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
  33. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
  34. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
  35. teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
  36. teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
  37. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
  38. teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
  39. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
  40. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
  41. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
  42. teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
  43. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
  44. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
  45. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
  46. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
  47. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
  48. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
  49. teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
  50. teradataml/data/glm_example.json +28 -1
  51. teradataml/data/housing_train_segment.csv +201 -0
  52. teradataml/data/insect2Cols.csv +61 -0
  53. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
  54. teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
  55. teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
  56. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
  57. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
  58. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
  59. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
  60. teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
  61. teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
  62. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
  63. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
  64. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
  65. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
  66. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
  67. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
  68. teradataml/data/kmeans_example.json +5 -0
  69. teradataml/data/kmeans_table.csv +10 -0
  70. teradataml/data/onehot_encoder_train.csv +4 -0
  71. teradataml/data/openml_example.json +29 -0
  72. teradataml/data/scale_attributes.csv +3 -0
  73. teradataml/data/scale_example.json +52 -1
  74. teradataml/data/scale_input_part_sparse.csv +31 -0
  75. teradataml/data/scale_input_partitioned.csv +16 -0
  76. teradataml/data/scale_input_sparse.csv +11 -0
  77. teradataml/data/scale_parameters.csv +3 -0
  78. teradataml/data/scripts/deploy_script.py +20 -1
  79. teradataml/data/scripts/sklearn/sklearn_fit.py +23 -27
  80. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +20 -28
  81. teradataml/data/scripts/sklearn/sklearn_function.template +13 -18
  82. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
  83. teradataml/data/scripts/sklearn/sklearn_neighbors.py +18 -27
  84. teradataml/data/scripts/sklearn/sklearn_score.py +20 -29
  85. teradataml/data/scripts/sklearn/sklearn_transform.py +30 -38
  86. teradataml/data/teradataml_example.json +77 -0
  87. teradataml/data/ztest_example.json +16 -0
  88. teradataml/dataframe/copy_to.py +8 -3
  89. teradataml/dataframe/data_transfer.py +120 -61
  90. teradataml/dataframe/dataframe.py +102 -17
  91. teradataml/dataframe/dataframe_utils.py +47 -9
  92. teradataml/dataframe/fastload.py +272 -89
  93. teradataml/dataframe/sql.py +84 -0
  94. teradataml/dbutils/dbutils.py +2 -2
  95. teradataml/lib/aed_0_1.dll +0 -0
  96. teradataml/opensource/sklearn/_sklearn_wrapper.py +102 -55
  97. teradataml/options/__init__.py +13 -4
  98. teradataml/options/configure.py +27 -6
  99. teradataml/scriptmgmt/UserEnv.py +19 -16
  100. teradataml/scriptmgmt/lls_utils.py +117 -14
  101. teradataml/table_operators/Script.py +2 -3
  102. teradataml/table_operators/TableOperator.py +58 -10
  103. teradataml/utils/validators.py +40 -2
  104. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/METADATA +78 -6
  105. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/RECORD +108 -90
  106. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/WHEEL +0 -0
  107. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/top_level.txt +0 -0
  108. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.1.dist-info}/zip-safe +0 -0
@@ -1618,6 +1618,8 @@ class _ArithmeticColumnExpression(ColumnExpression):
1618
1618
  def __sub__(self, other):
1619
1619
  """
1620
1620
  Compute the difference between two ColumnExpressions using -
1621
+ Note:
1622
+ * Difference between two timestamp columns return value in seconds.
1621
1623
 
1622
1624
  PARAMETERS:
1623
1625
  other:
@@ -1644,6 +1646,15 @@ class _ArithmeticColumnExpression(ColumnExpression):
1644
1646
  2 67/06/30 07/07/10 421.0 465.0 179.0
1645
1647
  3 67/06/30 07/07/10 434.0 485.0 185.0
1646
1648
  5 67/06/30 07/07/10 459.0 509.0 211.0
1649
+ >>> load_example_data("uaf", "Convolve2RealsLeft")
1650
+ >>> timestamp_df = DataFrame("Convolve2RealsLeft")
1651
+ >>> timestamp_df
1652
+ row_seq row_i_time col_seq column_i_time A B C D
1653
+ id
1654
+ 1 1 2018-08-08 08:02:00.000000 0 2018-08-08 08:00:00.000000 1.3 10.3 20.3 30.3
1655
+ 1 1 2018-08-08 08:02:00.000000 1 2018-08-08 08:02:00.000000 1.4 10.4 20.4 30.4
1656
+ 1 0 2018-08-08 08:00:00.000000 1 2018-08-08 08:02:00.000000 1.2 10.2 20.2 30.2
1657
+ 1 0 2018-08-08 08:00:00.000000 0 2018-08-08 08:00:00.000000 1.1 10.1 20.1 30.1
1647
1658
 
1648
1659
  # Example 1: Subtract 100 from the income amount and assign the final amount
1649
1660
  # to new column 'remaining_income'.
@@ -1666,7 +1677,26 @@ class _ArithmeticColumnExpression(ColumnExpression):
1666
1677
  1 67/06/30 07/07/10 415.0 451.0 180.0 271.0
1667
1678
  5 67/06/30 07/07/10 459.0 509.0 211.0 298.0
1668
1679
  4 67/06/30 07/07/10 448.0 493.0 192.0 301.0
1680
+
1681
+ # Example 3: Subtract 2 timestamp columns and assign to new column 'seconds'.
1682
+ >>> timestamp_df.assign(seconds = timestamp_df.row_i_time-timestamp_df.column_i_time)
1683
+ row_seq row_i_time col_seq column_i_time A B C D seconds
1684
+ id
1685
+ 1 1 2018-08-08 08:02:00.000000 0 2018-08-08 08:00:00.000000 1.3 10.3 20.3 30.3 120.0
1686
+ 1 1 2018-08-08 08:02:00.000000 1 2018-08-08 08:02:00.000000 1.4 10.4 20.4 30.4 0.0
1687
+ 1 0 2018-08-08 08:00:00.000000 1 2018-08-08 08:02:00.000000 1.2 10.2 20.2 30.2 -120.0
1688
+ 1 0 2018-08-08 08:00:00.000000 0 2018-08-08 08:00:00.000000 1.1 10.1 20.1 30.1 0.0
1689
+
1669
1690
  """
1691
+ if isinstance(self._type, TIMESTAMP) and isinstance(other._type, TIMESTAMP):
1692
+ s = """
1693
+ (CAST((CAST({0} AS DATE)-CAST({1} AS DATE)) AS FLOAT) * 86400) +
1694
+ ((EXTRACT(HOUR FROM {0}) - EXTRACT(HOUR FROM {1})) * 3600) +
1695
+ ((EXTRACT(MINUTE FROM {0}) - EXTRACT(MINUTE FROM {1})) * 60) +
1696
+ ((EXTRACT(SECOND FROM {0}) - EXTRACT(SECOND FROM {1})))
1697
+ """.format(self.compile(), other.compile())
1698
+ return _SQLColumnExpression(literal_column(s, type_ = FLOAT))
1699
+
1670
1700
  expr = other.expression if isinstance(other, _SQLColumnExpression) else other
1671
1701
  res = _SQLColumnExpression(self.expression - expr)
1672
1702
  return res
@@ -5437,6 +5467,8 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5437
5467
  # eg: df1.col1, df2.col2
5438
5468
  self.__has_multiple_dataframes = False
5439
5469
  self.__names = []
5470
+ self.alias_name = self.compile()
5471
+
5440
5472
 
5441
5473
  @property
5442
5474
  def expression(self):
@@ -10088,3 +10120,55 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10088
10120
  return list(set(result))
10089
10121
 
10090
10122
  return []
10123
+
10124
+ def alias(self, name):
10125
+ """
10126
+ DESCRIPTION:
10127
+ Function to returns this column with aliased name.
10128
+
10129
+ PARAMETERS:
10130
+ name:
10131
+ Required Argument.
10132
+ Specifies the column name.
10133
+ Type: str
10134
+
10135
+ RAISES:
10136
+ TypeError, ValueError
10137
+
10138
+ RETURNS:
10139
+ ColumnExpression
10140
+
10141
+ EXAMPLES:
10142
+ # Load the data to run the example.
10143
+ >>> load_example_data("dataframe", "admissions_train")
10144
+
10145
+ # Create a DataFrame on 'admissions_train' table.
10146
+ >>> df = DataFrame("admissions_train")
10147
+ >>> df
10148
+ masters gpa stats programming admitted
10149
+ id
10150
+ 38 yes 2.65 Advanced Beginner 1
10151
+ 7 yes 2.33 Novice Novice 1
10152
+ 26 yes 3.57 Advanced Advanced 1
10153
+ 5 no 3.44 Novice Novice 0
10154
+ 3 no 3.70 Novice Beginner 1
10155
+ 22 yes 3.46 Novice Beginner 0
10156
+ 24 no 1.87 Advanced Novice 1
10157
+ 36 no 3.00 Advanced Novice 0
10158
+ 19 yes 1.98 Advanced Advanced 0
10159
+ 40 yes 3.95 Novice Beginner 0
10160
+
10161
+ # Example 1: Alias the resultant column after aggregation with "count_program".
10162
+ >>> res = df.agg(df.programming.count().alias("count_program"))
10163
+ >>> res
10164
+ count_program
10165
+ 0 40
10166
+
10167
+ """
10168
+
10169
+ # Validate argument types
10170
+ arg_type_matrix = [["name", name , True, (str), True]]
10171
+ _Validators._validate_function_arguments(arg_type_matrix)
10172
+
10173
+ self.alias_name = name
10174
+ return self
@@ -737,7 +737,7 @@ def _check_if_python_packages_installed():
737
737
  """
738
738
  # Check if Python interpreter and add-ons packages are installed or not.
739
739
  try:
740
- query = TableOperatorConstants.CHECK_PYTHON_INSTALLED.value
740
+ query = TableOperatorConstants.CHECK_PYTHON_INSTALLED.value.format(configure.indb_install_location)
741
741
  UtilFuncs._execute_query(query=query)
742
742
 
743
743
  # If query execution is successful, then Python and add-on packages are
@@ -841,7 +841,7 @@ def db_python_package_details(names=None):
841
841
  package_str = "grep -E \"{0}\" | ".format(package_str)
842
842
 
843
843
  query = TableOperatorConstants.PACKAGE_VERSION_QUERY.value. \
844
- format(package_str, configure.default_varchar_size)
844
+ format(configure.indb_install_location, package_str, configure.default_varchar_size)
845
845
 
846
846
  ret_val = tdmldf.dataframe.DataFrame.from_query(query)
847
847
 
Binary file
@@ -76,7 +76,7 @@ class _GenericObjectWrapper:
76
76
  self.modelObj = None
77
77
  self._model_data = None
78
78
 
79
- self._tdml_tmp_dir = os.path.join(os.path.expanduser("~"), ".teradataml")
79
+ self._tdml_tmp_dir = GarbageCollector._get_temp_dir_name()
80
80
 
81
81
  self._env = None
82
82
 
@@ -212,27 +212,40 @@ class _GenericObjectWrapper:
212
212
  f"Script file '{file_name}' failed to get installed/replaced in Vantage."
213
213
  )
214
214
 
215
- def _get_partition_col_indices_and_types(self, data, partition_columns):
215
+ def _get_data_col_types_and_partition_col_indices_and_types(self, data, partition_columns,
216
+ idx_delim=",",
217
+ types_delim="--"):
216
218
  """
217
- partition_columns can be from feature columns and label columns.
218
- So, get the indices and types of these columns from the data columns.
219
+ Internal function to get the data column types and partition column names, indices and types.
220
+ Function returns delimiter separated string of types and indices if idx_delim and
221
+ types_delim are provided. Otherwise, it returns list of types and indices. Partition names
222
+ are returned as list always.
219
223
  """
220
- partition_indices = []
221
- partition_types = []
224
+ data_column_types = "" if types_delim else []
225
+ partition_indices = "" if idx_delim else []
226
+ partition_types = "" if types_delim else []
222
227
  new_partition_columns = []
228
+ j = 0
223
229
  for i, col in enumerate(data.columns):
230
+ _type = data._td_column_names_and_sqlalchemy_types[col.lower()].python_type.__name__
231
+ if types_delim:
232
+ data_column_types += (_type if i == 0 else f"{types_delim}{_type}")
233
+ else:
234
+ data_column_types.append(_type)
224
235
  if col in partition_columns:
225
236
  new_partition_columns.append(col)
226
- partition_indices.append(i)
227
- partition_types.append(data._td_column_names_and_sqlalchemy_types[col.lower()].\
228
- python_type.__name__)
229
- # Converting to string "None" if they are not present as empty string can't be passed
230
- # to Script script_commands' command line arguments.
231
- # Otherwise, pass the values as comma separated string.
232
- partition_indices = ",".join([str(x) for x in partition_indices])\
233
- if partition_indices else "None"
234
- partition_types = ",".join([x for x in partition_types]) if partition_types else "None"
235
- return partition_indices, partition_types, new_partition_columns
237
+ if idx_delim:
238
+ partition_indices += (str(i) if j == 0 else f"{idx_delim}{str(i)}")
239
+ else:
240
+ partition_indices.append(i)
241
+ if types_delim:
242
+ partition_types += (_type if j == 0 else f"{types_delim}{_type}")
243
+ else:
244
+ partition_types.append(_type)
245
+ j += 1
246
+ # Return types of all columns (as list or str), partition column indices (as list or str)
247
+ # and partition column types (as list or str).
248
+ return data_column_types, partition_indices, partition_types, new_partition_columns
236
249
 
237
250
  def _get_kwargs_str(self, kwargs):
238
251
  """
@@ -825,15 +838,15 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
825
838
  else:
826
839
  classes = str(None)
827
840
  class_type = str(None)
828
-
829
- partition_indices, partition_types, new_partition_columns = \
830
- self._get_partition_col_indices_and_types(data, new_partition_columns)
841
+
842
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
843
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
831
844
 
832
845
  # db_name is applicable for enterprise system.
833
846
  db_file_name = file_name if self._is_lake_system else f"./{self._db_name}/{file_name}"
834
847
  py_exc = UtilFuncs._get_python_execution_path()
835
848
  script_command = f"{py_exc} {db_file_name} {func} {len(feature_columns)} "\
836
- f"{len(label_columns)} {partition_indices} {partition_types} "\
849
+ f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
837
850
  f"{self._model_file_name_prefix} {classes} {class_type} {self._is_lake_system}"
838
851
 
839
852
  # Get unique values in partitioning columns.
@@ -972,7 +985,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
972
985
  feature_columns,
973
986
  label_columns,
974
987
  func_name,
975
- n_partitions,
976
988
  kwargs):
977
989
  """
978
990
  Internal function to return list of column names and their sqlalchemy types
@@ -1010,7 +1022,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1010
1022
 
1011
1023
  # For paritioning columns, it will be a dataframe and getattr(modelObj, func_name) fails.
1012
1024
  # Just for getting the number of columns and their types, using only one model of all.
1013
- if n_partitions == 1:
1025
+ if len(self._fit_partition_unique_values) == 1:
1014
1026
  # Single model case.
1015
1027
  skl_obj = self.modelObj
1016
1028
  else:
@@ -1038,11 +1050,10 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1038
1050
  "path() returns tuple of ndarrays of different shapes. Not Implemented yet."
1039
1051
  )
1040
1052
 
1041
- # This import is as per scipy version 1.10.x in local machine as teradataml does not
1042
- # impose restrictions on this package in setup.py. TODO
1043
- from scipy.sparse import csr_matrix
1044
-
1045
- if isinstance(trans_opt, csr_matrix):
1053
+ if isinstance(trans_opt, numpy.ndarray) and trans_opt.shape == (X.shape[0],):
1054
+ trans_opt = trans_opt.reshape(X.shape[0], 1)
1055
+
1056
+ if type(trans_opt).__name__ in ["csr_matrix", "csc_matrix"]:
1046
1057
  no_of_columns = trans_opt.get_shape()[1]
1047
1058
  trans_opt = trans_opt.toarray()
1048
1059
  elif isinstance(trans_opt, dict):
@@ -1054,6 +1065,14 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1054
1065
  else:
1055
1066
  no_of_columns = 1
1056
1067
 
1068
+ # Special handling when inverse_transform of no_of_columns returns no of rows
1069
+ # less than the no of classes. Such columns are filled with NaN values.
1070
+ # Updating number of columns here (new columns with NaN values will be added).
1071
+ if func_name == "inverse_transform" and self.class_name == "MultiLabelBinarizer":
1072
+ no_of_columns = len(self.classes_)
1073
+ for i in range(len(ten_row_data)):
1074
+ trans_opt[i] += tuple([numpy.nan] * (no_of_columns - len(trans_opt[i])))
1075
+
1057
1076
  # Special handling required for cross_decomposition classes's transform function, which
1058
1077
  # takes label columns also. In this case, output is a tuple of numpy arrays - x_scores and
1059
1078
  # y_scores. If label columns are not provided, only x_scores are returned.
@@ -1084,6 +1103,30 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1084
1103
  # Get new column sqlalchemy types for pandas df columns of transform output.
1085
1104
  opt_pd = pd.DataFrame(trans_opt)
1086
1105
 
1106
+ # Get output column types for each column in pandas df from the output of transform
1107
+ # type functions.
1108
+ types = {}
1109
+ for idx, col in enumerate(list(opt_pd.columns)):
1110
+ # Get type of column using data from all rows, in case if the column has None values.
1111
+ # 'and' of types of all values in the column with type(None) gives the type of the column.
1112
+ type_ = type(None)
1113
+ for i in range(len(trans_opt)):
1114
+ type_ = type_ and type(trans_opt[i][idx])
1115
+
1116
+ # If all the values of the output (trans_opt) is None, thelen use `str` as type since
1117
+ # pandas astype() does not accept None type.
1118
+ if type_ is type(None):
1119
+ type_ = str
1120
+
1121
+ # numpy integer columns with nan values can't be typecasted using pd.astype() to int64.
1122
+ # It raises error like "Cannot convert non-finite values (NA or inf) to integer:
1123
+ # Error while type casting for column '2'"
1124
+ # Hence, using pd.Int64Dtype() for integer columns with nan values.
1125
+ types[col] = type_ if type_ != numpy.int64 else pd.Int64Dtype()
1126
+
1127
+ # Without this, all columns will be of object type and gets converted to VARCHAR in Vantage.
1128
+ opt_pd = opt_pd.astype(types)
1129
+
1087
1130
  # If the datatype is not specified then check if the datatype is datetime64 and timezone is present then map it to
1088
1131
  # TIMESTAMP(timezone=True) else map it according to default value.
1089
1132
  col_types = [TIMESTAMP(timezone=True)
@@ -1123,14 +1166,14 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1123
1166
  script_file_path = f"{file_name}" if self._is_lake_system \
1124
1167
  else f"./{self._db_name}/{file_name}"
1125
1168
 
1126
- partition_indices, partition_types, new_partition_columns = \
1127
- self._get_partition_col_indices_and_types(data, new_partition_columns)
1169
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
1170
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
1128
1171
 
1129
1172
  self._validate_unique_partition_values(data, new_partition_columns)
1130
1173
 
1131
1174
  py_exc = UtilFuncs._get_python_execution_path()
1132
1175
  script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1133
- f"{len(label_columns)} {partition_indices} {partition_types} "\
1176
+ f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
1134
1177
  f"{self._model_file_name_prefix} {self._is_lake_system}"
1135
1178
 
1136
1179
  # score, aic, bic returns float values.
@@ -1191,14 +1234,14 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1191
1234
  script_file_path = f"{file_name}" if self._is_lake_system \
1192
1235
  else f"./{self._db_name}/{file_name}"
1193
1236
 
1194
- partition_indices, partition_types, new_partition_columns = \
1195
- self._get_partition_col_indices_and_types(data, new_partition_columns)
1237
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
1238
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
1196
1239
 
1197
1240
  self._validate_unique_partition_values(data, new_partition_columns)
1198
1241
 
1199
1242
  py_exc = UtilFuncs._get_python_execution_path()
1200
1243
  script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1201
- f"{len(label_columns)} {partition_indices} {partition_types} "\
1244
+ f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
1202
1245
  f"{self._model_file_name_prefix} {self._is_lake_system}"
1203
1246
 
1204
1247
  # Returning feature columns also along with transformed columns because we don't know the
@@ -1212,7 +1255,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1212
1255
  feature_columns,
1213
1256
  label_columns,
1214
1257
  func_name,
1215
- len(new_partition_columns),
1216
1258
  kwargs)
1217
1259
 
1218
1260
  # Installing model files before running sklearn_transform.py.
@@ -1253,7 +1295,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1253
1295
  feature_columns,
1254
1296
  label_columns,
1255
1297
  func_name,
1256
- len(new_partition_columns),
1257
1298
  {})
1258
1299
  else:
1259
1300
  # If there are no label_columns, we will have only one
@@ -1263,14 +1304,14 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1263
1304
  file_name = "sklearn_fit_predict.py"
1264
1305
  self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1265
1306
 
1266
- partition_indices, partition_types, new_partition_columns = \
1267
- self._get_partition_col_indices_and_types(data, new_partition_columns)
1307
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
1308
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
1268
1309
 
1269
1310
  script_file_name = f"{file_name}" if self._is_lake_system \
1270
1311
  else f"./{self._db_name}/{file_name}"
1271
1312
  py_exc = UtilFuncs._get_python_execution_path()
1272
1313
  script_command = f"{py_exc} {script_file_name} {len(feature_columns)} "\
1273
- f"{len(label_columns)} {partition_indices} {partition_types} "\
1314
+ f"{len(label_columns)} {partition_indices_str} {data_column_types_str} "\
1274
1315
  f"{self._model_file_name_prefix} {self._is_lake_system}"
1275
1316
 
1276
1317
  # Get unique values in partitioning columns.
@@ -1377,12 +1418,12 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1377
1418
  else:
1378
1419
  return_types += [("output", VARCHAR())]
1379
1420
 
1380
- partition_indices, partition_types, new_partition_columns = \
1381
- self._get_partition_col_indices_and_types(data, new_partition_columns)
1421
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
1422
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
1382
1423
 
1383
1424
  py_exc = UtilFuncs._get_python_execution_path()
1384
1425
  script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1385
- f"{partition_indices} {partition_types} {self._model_file_name_prefix} {self._is_lake_system} "\
1426
+ f"{partition_indices_str} {data_column_types_str} {self._model_file_name_prefix} {self._is_lake_system} "\
1386
1427
  f"{args_str}"
1387
1428
 
1388
1429
  # Get unique values in partitioning columns.
@@ -1496,12 +1537,12 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1496
1537
  return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1497
1538
  for col in new_partition_columns] + return_types
1498
1539
 
1499
- partition_indices, partition_types, new_partition_columns = \
1500
- self._get_partition_col_indices_and_types(data, new_partition_columns)
1540
+ data_column_types_str, partition_indices_str, _, new_partition_columns = \
1541
+ self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
1501
1542
 
1502
1543
  py_exc = UtilFuncs._get_python_execution_path()
1503
1544
  script_command = f"{py_exc} {script_file_path} {func_name} {len(feature_columns)} "\
1504
- f"{len(label_columns)} {len(group_columns)} {partition_indices} {partition_types} "\
1545
+ f"{len(label_columns)} {len(group_columns)} {partition_indices_str} {data_column_types_str} "\
1505
1546
  f"{self._model_file_name_prefix} {self._is_lake_system}"
1506
1547
 
1507
1548
  # Get unique values in partitioning columns.
@@ -1586,16 +1627,14 @@ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
1586
1627
 
1587
1628
  self.__params = kwargs
1588
1629
 
1589
- # Get indices and types of partition_columns.
1590
- idxs, types, partition_cols = self._get_partition_col_indices_and_types(self.__tdml_df,
1591
- partition_cols)
1630
+ # Get indices of partition_columns and types of all columns.
1631
+ data_column_types_str, partition_indices_str, _, partition_cols = \
1632
+ self._get_data_col_types_and_partition_col_indices_and_types(self.__tdml_df, partition_cols)
1592
1633
 
1593
1634
  script_file_path = f"{self._model_file_name}" if self._is_lake_system \
1594
1635
  else f"./{self._db_name}/{self._model_file_name}"
1595
1636
  py_exc = UtilFuncs._get_python_execution_path()
1596
- script_command = (f"{py_exc} {script_file_path} {idxs}"
1597
- f" ") + \
1598
- f"{types} {data_args_str}"
1637
+ script_command = f"{py_exc} {script_file_path} {partition_indices_str} {data_column_types_str} {data_args_str}"
1599
1638
 
1600
1639
  return_types = [(col, self.__tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
1601
1640
  for col in partition_cols] + [(self.__func_name, CLOB())]
@@ -1619,17 +1658,25 @@ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
1619
1658
  return self.modelObj
1620
1659
 
1621
1660
  def _prepare_data_args_string(self, kwargs):
1661
+ """
1662
+ Get column indices and types of each data related arguments in the format:
1663
+ "{<arg_name>-<comma separated indices>-<comma separated types>}--
1664
+ {<arg_name>-<comma separated indices>-<comma separated types>}"
1665
+ """
1622
1666
  data_args_str = []
1623
1667
  for arg_name in list(self.__data_args.keys()):
1624
1668
  # Remove DataFrame arguments from kwargs, which will be passed to Script.
1625
1669
  kwargs.pop(arg_name)
1626
1670
 
1627
1671
  # Get column indices and their types for each dataframe from parent dataframe.
1628
- _indices, _types, _ = self._get_partition_col_indices_and_types(self.__tdml_df,
1629
- self.__data_args[arg_name].columns)
1630
-
1631
- # Format "<arg_name>-<comma separated indices>-<comma separated types>"
1632
- data_args_str.append(f"{arg_name}-{_indices}-{_types}")
1672
+ _, partition_indices_str, partition_types_str, _ = \
1673
+ self._get_data_col_types_and_partition_col_indices_and_types(self.__tdml_df,
1674
+ self.__data_args[arg_name].columns,
1675
+ idx_delim=",",
1676
+ types_delim=",")
1677
+
1678
+ # Format "<arg_name>-<comma separated indices>-<comma separated types>"
1679
+ data_args_str.append(f"{arg_name}-{partition_indices_str}-{partition_types_str}")
1633
1680
 
1634
1681
  # Format "{<arg_name>-<comma separated indices>-<comma separated types>}--
1635
1682
  # {<arg_name>-<comma separated indices>-<comma separated types>}"
@@ -21,9 +21,11 @@ def set_config_params(**kwargs):
21
21
  auth_token:
22
22
  Optional Parameter.
23
23
  Specifies the authentication token to connect to VantageCloud Lake.
24
- Note:
25
- Authentication token will expire after a specific time.
26
- One can get the new authentication token and set it again.
24
+ Notes:
25
+ * Authentication token will expire after a specific time.
26
+ One can get the new authentication token and set it again.
27
+ * if "auth_token" is set through this function, then this function
28
+ should always be used only after create_context.
27
29
  Types: str
28
30
 
29
31
  ues_url:
@@ -77,6 +79,11 @@ def set_config_params(**kwargs):
77
79
  The default value is the installation location of In-DB 2.0.0 packages.
78
80
  Older versions of In-DB packages are installed at
79
81
  "/opt/teradata/languages/Python/".
82
+
83
+ local_storage:
84
+ Specifies the location on client where garbage collector folder will be created.
85
+ Types: str
86
+
80
87
  RETURNS:
81
88
  bool
82
89
 
@@ -93,7 +100,8 @@ def set_config_params(**kwargs):
93
100
  ... val_install_location="VAL_USER",
94
101
  ... read_nos_function_mapping="read_nos_fm",
95
102
  ... write_nos_function_mapping="write_nos_fm",
96
- ... indb_install_location="/opt/teradata/languages/Python")
103
+ ... indb_install_location="/opt/teradata/languages/Python",
104
+ ... local_storage="/Users/gc")
97
105
  True
98
106
 
99
107
  # Example 2: Alternatively, set configuration parameters without using set_config_params() function.
@@ -106,6 +114,7 @@ def set_config_params(**kwargs):
106
114
  >>> configure.read_nos_function_mapping="read_nos_fm"
107
115
  >>> configure.write_nos_function_mapping="write_nos_fm"
108
116
  >>> configure.indb_install_location="/opt/teradata/languages/Python"
117
+ >>> configure.local_storage = "/Users/gc/"
109
118
  """
110
119
  for option in kwargs:
111
120
  try:
@@ -13,7 +13,6 @@ from teradataml.common.exceptions import TeradataMlException
13
13
  from teradataml.common.messages import Messages
14
14
  from teradataml.common.messagecodes import MessageCodes
15
15
 
16
-
17
16
  class _ConfigureSuper(object):
18
17
 
19
18
  def __init__(self):
@@ -58,6 +57,7 @@ class _Configure(_ConfigureSuper):
58
57
  inline_plot = _create_property('inline_plot')
59
58
  indb_install_location = _create_property('indb_install_location')
60
59
  openml_user_env = _create_property('openml_user_env')
60
+ local_storage = _create_property('local_storage')
61
61
 
62
62
  def __init__(self, default_varchar_size=1024, column_casesensitive_handler = False,
63
63
  vantage_version="vantage1.1", val_install_location=None,
@@ -66,7 +66,7 @@ class _Configure(_ConfigureSuper):
66
66
  read_nos_function_mapping="read_nos", write_nos_function_mapping="write_nos",
67
67
  cran_repositories=None, inline_plot=True,
68
68
  indb_install_location="/var/opt/teradata/languages/sles12sp3/Python/",
69
- openml_user_env=None):
69
+ openml_user_env=None, local_storage=None):
70
70
 
71
71
  """
72
72
  PARAMETERS:
@@ -163,6 +163,13 @@ class _Configure(_ConfigureSuper):
163
163
  # Set the environment to be used for OpenML.
164
164
  _env_name = "OpenAF" # Name of the user defined environment.
165
165
  teradataml.options.configure.openml_user_env = get_env(_env_name)
166
+
167
+ local_storage:
168
+ Specifies the location on client where garbage collector folder will be created.
169
+ Types: string
170
+ Example:
171
+ # Set the garbage collector location to "/Users/gc/"
172
+ teradataml.options.configure.local_storage = "/Users/gc/"
166
173
  """
167
174
  super().__init__()
168
175
  super().__setattr__('default_varchar_size', default_varchar_size)
@@ -179,6 +186,7 @@ class _Configure(_ConfigureSuper):
179
186
  super().__setattr__('inline_plot', True)
180
187
  super().__setattr__('indb_install_location', indb_install_location)
181
188
  super().__setattr__('openml_user_env', openml_user_env)
189
+ super().__setattr__('local_storage', local_storage)
182
190
 
183
191
  # internal configurations
184
192
  # These configurations are internal and should not be
@@ -221,6 +229,12 @@ class _Configure(_ConfigureSuper):
221
229
  super().__setattr__('_oauth_client_id', None)
222
230
  # Internal parameter, that is used for specifying the Authentication token expiry time.
223
231
  super().__setattr__('_auth_token_expiry_time', None)
232
+ # Internal parameter, that is used for specifying the OAuth authentication.
233
+ super().__setattr__('_oauth', None)
234
+ # Internal parameter, that is used for specifying the current database associated with current connection.
235
+ super().__setattr__('_current_database_name', None)
236
+ # Internal parameter, that is used for specifying the database username associated with current connection.
237
+ super().__setattr__('_database_username', None)
224
238
 
225
239
  def __setattr__(self, name, value):
226
240
  if hasattr(self, name):
@@ -243,7 +257,7 @@ class _Configure(_ConfigureSuper):
243
257
  "greater than or equal to"),
244
258
  MessageCodes.TDMLDF_POSITIVE_INT)
245
259
  elif name in ['column_casesensitive_handler', '_validate_metaexpression',
246
- '_validate_gc', 'inline_plot']:
260
+ '_validate_gc', 'inline_plot', '_oauth']:
247
261
 
248
262
  if not isinstance(value, bool):
249
263
  raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, name,
@@ -283,15 +297,21 @@ class _Configure(_ConfigureSuper):
283
297
  "a value in {}".format(valid_versions)),
284
298
  MessageCodes.INVALID_ARG_VALUE)
285
299
 
286
- elif name in ['val_install_location', 'byom_install_location', 'database_version',
300
+ elif name in ['val_install_location', 'byom_install_location',
287
301
  'read_nos_function_mapping', 'write_nos_function_mapping',
288
302
  '_byom_model_catalog_database', '_byom_model_catalog_table',
289
303
  '_byom_model_catalog_license', '_byom_model_catalog_license_source',
290
- 'indb_install_location']:
304
+ 'indb_install_location', 'local_storage']:
291
305
  if not isinstance(value, str):
292
306
  raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, name,
293
307
  'str'),
294
308
  MessageCodes.UNSUPPORTED_DATATYPE)
309
+ if name == 'local_storage':
310
+ # Validate if path exists.
311
+ if not os.path.exists(value):
312
+ raise TeradataMlException(
313
+ Messages.get_message(MessageCodes.PATH_NOT_FOUND).format(value),
314
+ MessageCodes.PATH_NOT_FOUND)
295
315
 
296
316
  elif name in {'ues_url', '_oauth_end_point', '_oauth_client_id'}:
297
317
 
@@ -305,7 +325,8 @@ class _Configure(_ConfigureSuper):
305
325
  value = value[: -1] if value.endswith("/") else value
306
326
 
307
327
  elif name in ['temp_table_database', 'temp_view_database',
308
- "_byom_model_catalog_license_table", "_byom_model_catalog_license_database"]:
328
+ "_byom_model_catalog_license_table", "_byom_model_catalog_license_database",
329
+ "_current_database_name", "_database_username", "database_version"]:
309
330
  if not isinstance(value, str) and not isinstance(value, type(None)):
310
331
  raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, name,
311
332
  'str or None'),
@@ -23,6 +23,7 @@ from teradataml import configure
23
23
  from teradataml.utils.internal_buffer import _InternalBuffer
24
24
  from concurrent.futures import ThreadPoolExecutor, wait
25
25
  from teradataml.clients.pkce_client import _DAWorkflow
26
+ from teradataml.clients.auth_client import _AuthWorkflow
26
27
  from teradataml.context.context import _get_user
27
28
  from teradataml.common.constants import HTTPRequest, CloudProvider
28
29
  from teradataml.common.exceptions import TeradataMlException
@@ -219,22 +220,24 @@ def _get_auth_token():
219
220
  >>>_get_auth_token()
220
221
  """
221
222
  # Check the current time. If token is expiring, get another one from refresh token.
222
- if configure._auth_token_expiry_time and time.time() > configure._auth_token_expiry_time:
223
- # Extract the base URL from "ues_url".
224
- ues_url = configure.ues_url
225
- client_id = configure._oauth_client_id
226
-
227
- url_parser = urlparse(ues_url)
228
- base_url = "{}://{}".format(url_parser.scheme, url_parser.netloc)
229
-
230
- # Get the JWT Token details.
231
- da_wf = _DAWorkflow(base_url, client_id)
232
- token_data = da_wf._get_token_data()
233
-
234
- # Replace the options with new values.
235
- configure._auth_token_expiry_time = time.time() + token_data["expires_in"] - 15
236
- # Store the jwt token in internal class attribute.
237
- _InternalBuffer.add(auth_token=_AuthToken(token=token_data["access_token"]))
223
+ if configure._oauth:
224
+ if configure._auth_token_expiry_time and time.time() > configure._auth_token_expiry_time:
225
+ # Extract the base URL from "ues_url".
226
+ ues_url = configure.ues_url
227
+ client_id = configure._oauth_client_id
228
+
229
+ url_parser = urlparse(ues_url)
230
+ base_url = "{}://{}".format(url_parser.scheme, url_parser.netloc)
231
+
232
+ # Get the JWT Token details.
233
+ da_wf = _DAWorkflow(base_url, client_id)
234
+ token_data = da_wf._get_token_data()
235
+
236
+ # Replace the options with new values.
237
+ configure._auth_token_expiry_time = time.time() + token_data["expires_in"] - 15
238
+
239
+ # Store the jwt token in internal class attribute.
240
+ _InternalBuffer.add(auth_token=_AuthToken(token=token_data["access_token"]))
238
241
 
239
242
  return {"Authorization": "Bearer {}".format(_InternalBuffer.get("auth_token").value)}
240
243