teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (200) hide show
  1. teradataml/LICENSE.pdf +0 -0
  2. teradataml/README.md +112 -0
  3. teradataml/__init__.py +6 -3
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/__init__.py +3 -2
  6. teradataml/analytics/analytic_function_executor.py +224 -16
  7. teradataml/analytics/analytic_query_generator.py +92 -0
  8. teradataml/analytics/byom/__init__.py +3 -2
  9. teradataml/analytics/json_parser/metadata.py +1 -0
  10. teradataml/analytics/json_parser/utils.py +6 -4
  11. teradataml/analytics/meta_class.py +40 -1
  12. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  13. teradataml/analytics/sqle/__init__.py +10 -2
  14. teradataml/analytics/table_operator/__init__.py +3 -2
  15. teradataml/analytics/uaf/__init__.py +21 -2
  16. teradataml/analytics/utils.py +62 -1
  17. teradataml/analytics/valib.py +1 -1
  18. teradataml/automl/__init__.py +1502 -323
  19. teradataml/automl/custom_json_utils.py +139 -61
  20. teradataml/automl/data_preparation.py +245 -306
  21. teradataml/automl/data_transformation.py +32 -12
  22. teradataml/automl/feature_engineering.py +313 -82
  23. teradataml/automl/model_evaluation.py +44 -35
  24. teradataml/automl/model_training.py +109 -146
  25. teradataml/catalog/byom.py +8 -8
  26. teradataml/clients/pkce_client.py +1 -1
  27. teradataml/common/constants.py +37 -0
  28. teradataml/common/deprecations.py +13 -7
  29. teradataml/common/garbagecollector.py +151 -120
  30. teradataml/common/messagecodes.py +4 -1
  31. teradataml/common/messages.py +2 -1
  32. teradataml/common/sqlbundle.py +1 -1
  33. teradataml/common/utils.py +97 -11
  34. teradataml/common/wrapper_utils.py +1 -1
  35. teradataml/context/context.py +72 -2
  36. teradataml/data/complaints_test_tokenized.csv +353 -0
  37. teradataml/data/complaints_tokens_model.csv +348 -0
  38. teradataml/data/covid_confirm_sd.csv +83 -0
  39. teradataml/data/dataframe_example.json +10 -0
  40. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  41. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  42. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  43. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  44. teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
  45. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  46. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  47. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  48. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  49. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  51. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  52. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  53. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  54. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  55. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  56. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  57. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  58. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  59. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  60. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  61. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  62. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  63. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  64. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  65. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  66. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  67. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  68. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  69. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  70. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  71. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  72. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  74. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  75. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  76. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  77. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  78. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  79. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  80. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  81. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  82. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  83. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  84. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  85. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  86. teradataml/data/dwt2d_dataTable.csv +65 -0
  87. teradataml/data/dwt_dataTable.csv +8 -0
  88. teradataml/data/dwt_filterTable.csv +3 -0
  89. teradataml/data/finance_data4.csv +13 -0
  90. teradataml/data/grocery_transaction.csv +19 -0
  91. teradataml/data/idwt2d_dataTable.csv +5 -0
  92. teradataml/data/idwt_dataTable.csv +8 -0
  93. teradataml/data/idwt_filterTable.csv +3 -0
  94. teradataml/data/interval_data.csv +5 -0
  95. teradataml/data/jsons/paired_functions.json +14 -0
  96. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  97. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  98. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  99. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  100. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  101. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  102. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  103. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  104. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  105. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  106. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  107. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  108. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  109. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  110. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  111. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  112. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  113. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  114. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  115. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  116. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  117. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  118. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  119. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  120. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  121. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  122. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  123. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  124. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  125. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  126. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  127. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  128. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  129. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  130. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  131. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  132. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  133. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  134. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  135. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  136. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  137. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  138. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  139. teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
  140. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  141. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  142. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  143. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  144. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  145. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
  146. teradataml/data/load_example_data.py +8 -2
  147. teradataml/data/naivebayestextclassifier_example.json +1 -1
  148. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  149. teradataml/data/peppers.png +0 -0
  150. teradataml/data/real_values.csv +14 -0
  151. teradataml/data/sax_example.json +8 -0
  152. teradataml/data/scripts/deploy_script.py +1 -1
  153. teradataml/data/scripts/sklearn/sklearn_fit.py +17 -10
  154. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +2 -2
  155. teradataml/data/scripts/sklearn/sklearn_function.template +30 -7
  156. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  157. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  158. teradataml/data/scripts/sklearn/sklearn_transform.py +55 -4
  159. teradataml/data/star_pivot.csv +8 -0
  160. teradataml/data/templates/open_source_ml.json +2 -1
  161. teradataml/data/teradataml_example.json +20 -1
  162. teradataml/data/timestamp_data.csv +4 -0
  163. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  164. teradataml/data/uaf_example.json +55 -1
  165. teradataml/data/unpivot_example.json +15 -0
  166. teradataml/data/url_data.csv +9 -0
  167. teradataml/data/windowdfft.csv +16 -0
  168. teradataml/dataframe/copy_to.py +1 -1
  169. teradataml/dataframe/data_transfer.py +5 -3
  170. teradataml/dataframe/dataframe.py +474 -41
  171. teradataml/dataframe/fastload.py +3 -3
  172. teradataml/dataframe/functions.py +339 -0
  173. teradataml/dataframe/row.py +160 -0
  174. teradataml/dataframe/setop.py +2 -2
  175. teradataml/dataframe/sql.py +658 -20
  176. teradataml/dataframe/window.py +1 -1
  177. teradataml/dbutils/dbutils.py +322 -16
  178. teradataml/geospatial/geodataframe.py +1 -1
  179. teradataml/geospatial/geodataframecolumn.py +1 -1
  180. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  181. teradataml/lib/aed_0_1.dll +0 -0
  182. teradataml/opensource/sklearn/_sklearn_wrapper.py +154 -69
  183. teradataml/options/__init__.py +3 -1
  184. teradataml/options/configure.py +14 -2
  185. teradataml/options/display.py +2 -2
  186. teradataml/plot/axis.py +4 -4
  187. teradataml/scriptmgmt/UserEnv.py +10 -6
  188. teradataml/scriptmgmt/lls_utils.py +3 -2
  189. teradataml/table_operators/Script.py +2 -2
  190. teradataml/table_operators/TableOperator.py +106 -20
  191. teradataml/table_operators/table_operator_util.py +88 -41
  192. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  193. teradataml/telemetry_utils/__init__.py +0 -0
  194. teradataml/telemetry_utils/queryband.py +52 -0
  195. teradataml/utils/validators.py +1 -1
  196. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +115 -2
  197. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +200 -140
  198. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
  199. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
  200. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
@@ -49,7 +49,7 @@ from teradataml.opensource.sklearn.constants import OpenSourcePackage, _OSML_MOD
49
49
  from teradataml.common.messagecodes import MessageCodes
50
50
  from teradataml.common.messages import Messages
51
51
  from teradataml.catalog.byom import save_byom, retrieve_byom, delete_byom
52
- from teradataml.dbutils.dbutils import _create_table
52
+ from teradataml.dbutils.dbutils import _create_table, set_session_param
53
53
  from teradataml.utils.validators import _Validators
54
54
  from teradataml.dataframe.dataframe import DataFrame
55
55
  from teradataml.dataframe.dataframe_utils import DataFrameUtils
@@ -64,6 +64,10 @@ validator = _Validators()
64
64
 
65
65
  installed_model_files = defaultdict(int)
66
66
 
67
+ ## Flag to ensure the sklearn script
68
+ ## installation occurs only once.
69
+ _file_installed = False
70
+
67
71
  class _GenericObjectWrapper:
68
72
  def __init__(self) -> None:
69
73
  self._db_name = _get_current_databasename()
@@ -86,43 +90,24 @@ class _GenericObjectWrapper:
86
90
  if configure.openml_user_env is not None:
87
91
  self._env = configure.openml_user_env
88
92
  else:
89
- self._create_or_get_env()
93
+ self._env = UtilFuncs._create_or_get_env("open_source_ml.json")
90
94
  else:
91
- execute_sql(f"SET SESSION SEARCHUIFDBPATH = {self._db_name};")
92
-
93
- def _create_or_get_env(self):
94
- """
95
- Internal function to return the env if already exists else
96
- creates the environment using template file and return the env.
97
- """
98
- # Get the template file path.
99
- template_dir_path = os.path.join(_TDML_DIRECTORY, "data", "templates",
100
- "open_source_ml.json")
95
+ set_session_param("searchuifdbpath",self._db_name)
101
96
 
102
- # Read template file.
103
- with open(template_dir_path, "r") as r_file:
104
- data = json.load(r_file)
97
+ global _file_installed
98
+ ## Flag to check whether trained model is installed or not.
99
+ self._is_trained_model_installed = False
105
100
 
106
- # Get env_name.
107
- _env_name = data["env_specs"][0]["env_name"]
101
+ ## Install all sklearn script files on Vantage.
102
+ if not _file_installed:
103
+ sklearn_script_files = ["sklearn_fit.py", "sklearn_score.py",
104
+ "sklearn_transform.py", "sklearn_fit_predict.py",
105
+ "sklearn_neighbors.py", "sklearn_model_selection_split.py"]
106
+ for script_file in sklearn_script_files:
107
+ self._install_script_file(file_identifier=script_file.split(".")[0],
108
+ file_name=script_file)
108
109
 
109
- try:
110
- # Call function to 'openml_env' get env.
111
- self._env = get_env(_env_name)
112
- except TeradataMlException as tdml_e:
113
- # We will get here when error says, env does not exist otherwise raise the exception as is.
114
- # Env does not exist so create one.
115
-
116
- exc_msg = "Failed to execute get_env(). User environment '{}' not " \
117
- "found.".format(_env_name)
118
- if exc_msg in tdml_e.args[0]:
119
- print(f"No OpenAF environment with name '{_env_name}' found. Creating one with "\
120
- "latest supported python and required packages.")
121
- _env = create_env(template=template_dir_path)
122
- else:
123
- raise tdml_e
124
- except Exception as exc:
125
- raise exc
110
+ _file_installed = True
126
111
 
127
112
  def _get_columns_as_list(self, cols):
128
113
  """
@@ -205,13 +190,31 @@ class _GenericObjectWrapper:
205
190
  is_binary=is_binary)
206
191
  else:
207
192
  status = self._env.install_file(file_path=new_script,
208
- replace=True,
209
- suppress_output=True)
193
+ replace=True,
194
+ suppress_output=True)
210
195
  if not status:
211
196
  raise TeradataMlException(
212
197
  f"Script file '{file_name}' failed to get installed/replaced in Vantage."
213
198
  )
214
199
 
200
+ def _remove_script_file(self, file_name):
201
+ """
202
+ Internal function to remove script file in Vantage.
203
+ """
204
+ # _env is set while object creation
205
+ # If not set, it is Vantage Enterprise. Otherwise, it is Vantage Lake.
206
+
207
+ if not self._is_lake_system:
208
+ status = remove_file(file_identifier=file_name.split(".")[0],
209
+ force_remove=True,
210
+ suppress_output=True)
211
+ else:
212
+ status = self._env.remove_file(file_name=file_name,
213
+ suppress_output=True)
214
+ if not status:
215
+ raise TeradataMlException(
216
+ f"Script file '{file_name}' failed to remove in Vantage."
217
+ )
215
218
  def _get_data_col_types_and_partition_col_indices_and_types(self, data, partition_columns,
216
219
  idx_delim=",",
217
220
  types_delim="--"):
@@ -370,6 +373,23 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
370
373
  Internal function to get attributes of all sklearn model objects when multiple models are
371
374
  generated by fit.
372
375
  """
376
+
377
+ def __generate_model_object(model_obj_value):
378
+ """
379
+ Internal function to generate _SkLearnWrapperObject model object from model_obj_value.
380
+ """
381
+ # Create _SkLearnObjectWrapper object from opensource model object.
382
+ model_obj = self.__class__(model=first_atrribute_instance)
383
+ model_obj.modelObj = model_obj_value
384
+ model_obj._is_model_installed = True
385
+
386
+ # Setting other model attributes.
387
+ model_obj._is_default_partition_value_fit = self._is_default_partition_value_fit
388
+ model_obj._is_default_partition_value_predict = self._is_default_partition_value_predict
389
+ model_obj._fit_partition_colums_non_default = self._fit_partition_colums_non_default
390
+ model_obj._fit_partition_unique_values = self._fit_partition_unique_values
391
+ return model_obj
392
+
373
393
  # Wrapper function to invoke dynamic method, using arguments
374
394
  # passed by user, on model in each row.
375
395
  def __sklearn_method_invoker_for_multimodel(*c, **kwargs):
@@ -377,36 +397,58 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
377
397
  for i in range(multi_models.shape[0]):
378
398
  curr_model = multi_models.iloc[i]["model"]
379
399
  multi_models.at[i, "model"] = getattr(curr_model, name)(*c, **kwargs)
400
+
401
+ first_function_instance = multi_models.at[0, "model"]
402
+ if self.__class__._validate_model_supportability(first_function_instance):
403
+ return __generate_model_object(multi_models)
404
+
380
405
  return multi_models.rename(columns={"model": name})
381
406
 
382
- # Identify if attribute is callable or not to avoid
383
- # this check in loop for every model.
384
- is_attr_callable = False
385
407
  # Assuming that self.modelObj will have at least 1 row.
386
- is_attr_callable = callable(getattr(self.modelObj.iloc[0]["model"], name))
387
408
 
388
- # If attribute is callable, it should be applied on model in each row
409
+ # Get attribute instance from first model object.
410
+ first_atrribute_instance = getattr(self.modelObj.iloc[0]["model"], name)
411
+
412
+ # If first_atrribute_instance is callable, it should be applied on model in each row
389
413
  # using passed arguments.
390
- if is_attr_callable:
414
+ if callable(first_atrribute_instance):
391
415
  return __sklearn_method_invoker_for_multimodel
392
416
 
393
417
  output_attributes = self.modelObj.copy()
394
418
  for i in range(output_attributes.shape[0]):
395
419
  model = output_attributes.iloc[i]["model"]
396
420
  output_attributes.at[i, "model"] = getattr(model, name)
421
+
422
+ if self.__class__._validate_model_supportability(first_atrribute_instance):
423
+ return __generate_model_object(output_attributes)
424
+
397
425
  return output_attributes.rename(columns={"model": name})
398
426
 
399
427
  def __getattr__(self, name):
400
428
  # This just run attributes (functions and properties) from sklearn object.
401
429
  def __sklearn_method_invoker(*c, **kwargs):
402
- return atrribute_instance(*c, **kwargs)
430
+ # sklearn model is returned from the function call. Create _SkLearnObjectWrapper object.
431
+ model_obj = attribute_instance(*c, **kwargs)
432
+ if self.__class__._validate_model_supportability(model_obj):
433
+ model_obj = self.__class__(model=model_obj)
434
+ model_obj._is_model_installed = True # Trained model is returned by function call.
435
+ return model_obj
436
+
403
437
  if isinstance(self.modelObj, pd.DataFrame):
404
438
  return self.__get_obj_attributes_multi_model(name)
405
439
 
406
- atrribute_instance = getattr(self.modelObj, name)
407
- if callable(atrribute_instance):
440
+ attribute_instance = getattr(self.modelObj, name)
441
+
442
+ if callable(attribute_instance):
408
443
  return __sklearn_method_invoker
409
- return atrribute_instance
444
+
445
+ if self.__class__._validate_model_supportability(attribute_instance):
446
+ # sklearn model is returned from the attribute. Create _SkLearnObjectWrapper object.
447
+ model_obj = self.__class__(model=attribute_instance)
448
+ model_obj._is_model_installed = True # Trained model is returned as attribute.
449
+ return model_obj
450
+
451
+ return attribute_instance
410
452
 
411
453
  @classmethod
412
454
  def _validate_model_supportability(cls, model):
@@ -417,15 +459,25 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
417
459
  error_msg = Messages.get_message(MessageCodes.MODEL_CATALOGING_OPERATION_FAILED, "validate",
418
460
  "The given model is not a supported opensource model.")
419
461
  msg_code = MessageCodes.MODEL_CATALOGING_OPERATION_FAILED
462
+ package_name = None
463
+ class_name = None
420
464
  try:
421
465
  # For scikit-learn, model.__module__ is similar to 'sklearn.linear_model._base'.
422
466
  # TODO: check for other supported packages.
423
- if model.__module__.split(".")[0] not in OpenSourcePackage.values():
424
- raise TeradataMlException(error_msg, msg_code)
467
+ if hasattr(model, "__module__"):
468
+ package_name = model.__module__.split(".")[0]
469
+ if package_name not in OpenSourcePackage.values():
470
+ return False
471
+ if hasattr(model, "__class__"):
472
+ class_name = model.__class__.__name__
425
473
  except Exception as ex:
426
474
  # If in case, model.__module__ fails.
427
475
  raise TeradataMlException(error_msg, msg_code) from ex
428
476
 
477
+ # True only if package name is opensource package name and class name is not internal class.
478
+ return True if package_name and class_name and \
479
+ package_name == cls.OPENSOURCE_PACKAGE_NAME.value and not class_name.startswith("_") else False
480
+
429
481
  def _save_model(self, model_name, replace_if_exists=False):
430
482
  """
431
483
  Internal function to save the model stored in file at location mentioned by class variable
@@ -436,7 +488,8 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
436
488
  conn = get_connection()
437
489
  osml_models_table_exists = conn.dialect.has_table(conn,
438
490
  table_name=_OSML_MODELS_TABLE_NAME,
439
- schema=self._db_name)
491
+ schema=self._db_name,
492
+ table_only=True)
440
493
  if not osml_models_table_exists:
441
494
  all_columns = _OSML_MODELS_TABLE_COLUMNS_TYPE_DICT.copy()
442
495
  all_columns.update(_OSML_ADDITIONAL_COLUMN_TYPES)
@@ -484,7 +537,11 @@ class _OpenSourceObjectWrapper(_GenericObjectWrapper):
484
537
  Internal function to create an instance of the class using the model and deploy
485
538
  the model to Vantage.
486
539
  """
487
- cls._validate_model_supportability(model=model)
540
+ is_model_supportable = cls._validate_model_supportability(model=model)
541
+ if not is_model_supportable:
542
+ raise TeradataMlException(Messages.get_message(MessageCodes.MODEL_CATALOGING_OPERATION_FAILED,
543
+ "deploy", "The given model is not a supported opensource model."),
544
+ MessageCodes.MODEL_CATALOGING_OPERATION_FAILED)
488
545
 
489
546
  cls = cls(model=model)
490
547
  # Load the model file into Vantage node as file can be used in
@@ -830,7 +887,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
830
887
  for col in new_partition_columns] + [("model", model_type)]
831
888
 
832
889
  file_name = "sklearn_fit.py"
833
- self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
834
890
 
835
891
  if classes:
836
892
  class_type = type(classes[0]).__name__
@@ -865,6 +921,13 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
865
921
  self._fit_label_columns_types = [data._td_column_names_and_sqlalchemy_types[l_c.lower()]
866
922
  for l_c in label_columns]
867
923
 
924
+ # If the model is trained a second time after the object creation,
925
+ # or if set_params() is called after the first model training,
926
+ # this flag will reset to False. So that for subsequent predict/score
927
+ # operations, the newly trained model will be installed.
928
+ if self._is_trained_model_installed:
929
+ self._is_trained_model_installed = False
930
+
868
931
  def partial_fit(self, X=None, y=None, classes=None, **kwargs):
869
932
  """
870
933
  Please check the description in Docs/OpensourceML/sklearn.py.
@@ -1122,7 +1185,7 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1122
1185
  # It raises error like "Cannot convert non-finite values (NA or inf) to integer:
1123
1186
  # Error while type casting for column '2'"
1124
1187
  # Hence, using pd.Int64Dtype() for integer columns with nan values.
1125
- types[col] = type_ if type_ != numpy.int64 else pd.Int64Dtype()
1188
+ types[col] = type_ if type_ not in [int, numpy.int64] else pd.Int64Dtype()
1126
1189
 
1127
1190
  # Without this, all columns will be of object type and gets converted to VARCHAR in Vantage.
1128
1191
  opt_pd = opt_pd.astype(types)
@@ -1161,7 +1224,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1161
1224
  partition_columns)
1162
1225
 
1163
1226
  file_name = "sklearn_score.py"
1164
- self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1165
1227
 
1166
1228
  script_file_path = f"{file_name}" if self._is_lake_system \
1167
1229
  else f"./{self._db_name}/{file_name}"
@@ -1180,7 +1242,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1180
1242
  return_types = [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1181
1243
  for col in new_partition_columns] + [(func_name, FLOAT())]
1182
1244
 
1183
- self._install_initial_model_file()
1245
+ # Checking the trained model installation. If not installed,
1246
+ # install it and set flag to True.
1247
+ if not self._is_trained_model_installed:
1248
+ self._install_initial_model_file()
1249
+ self._is_trained_model_installed = True
1184
1250
 
1185
1251
  opt = self._run_script(data, script_command, new_partition_columns, return_types)
1186
1252
 
@@ -1229,7 +1295,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1229
1295
  kwargs.pop("label_columns")
1230
1296
 
1231
1297
  file_name = "sklearn_transform.py"
1232
- self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1233
1298
 
1234
1299
  script_file_path = f"{file_name}" if self._is_lake_system \
1235
1300
  else f"./{self._db_name}/{file_name}"
@@ -1251,14 +1316,18 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1251
1316
  if func_name in ["predict", "decision_function"] and label_columns:
1252
1317
  return_types += [(col, data._td_column_names_and_sqlalchemy_types[col.lower()])
1253
1318
  for col in label_columns]
1319
+
1254
1320
  return_types += self._get_return_columns_for_function_(data,
1255
1321
  feature_columns,
1256
1322
  label_columns,
1257
1323
  func_name,
1258
1324
  kwargs)
1259
1325
 
1260
- # Installing model files before running sklearn_transform.py.
1261
- self._install_initial_model_file()
1326
+ # Checking the trained model installation. If not installed,
1327
+ # install it and set flag to True.
1328
+ if not self._is_trained_model_installed:
1329
+ self._install_initial_model_file()
1330
+ self._is_trained_model_installed = True
1262
1331
 
1263
1332
  opt = self._run_script(data, script_command, new_partition_columns, return_types)
1264
1333
 
@@ -1302,7 +1371,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1302
1371
  return_types += [(f"{self.class_name.lower()}_{func_name}_1", FLOAT())]
1303
1372
 
1304
1373
  file_name = "sklearn_fit_predict.py"
1305
- self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1306
1374
 
1307
1375
  data_column_types_str, partition_indices_str, _, new_partition_columns = \
1308
1376
  self._get_data_col_types_and_partition_col_indices_and_types(data, new_partition_columns)
@@ -1317,7 +1385,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1317
1385
  # Get unique values in partitioning columns.
1318
1386
  self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
1319
1387
 
1320
- self._install_initial_model_file()
1388
+ # Checking the trained model installation. If not installed,
1389
+ # install it and flag to True.
1390
+ if not self._is_trained_model_installed:
1391
+ self._install_initial_model_file()
1392
+ self._is_trained_model_installed = True
1321
1393
 
1322
1394
  opt = self._run_script(data, script_command, new_partition_columns, return_types)
1323
1395
 
@@ -1395,7 +1467,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1395
1467
  args_str = self._get_kwargs_str(kwargs)
1396
1468
 
1397
1469
  file_name = "sklearn_neighbors.py"
1398
- self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1399
1470
 
1400
1471
  script_file_path = f"{file_name}" if self._is_lake_system \
1401
1472
  else f"./{self._db_name}/{file_name}"
@@ -1429,7 +1500,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1429
1500
  # Get unique values in partitioning columns.
1430
1501
  self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
1431
1502
 
1432
- self._install_initial_model_file()
1503
+ # Checking the trained model installation. If not installed,
1504
+ # install it and set flag to True.
1505
+ if not self._is_trained_model_installed:
1506
+ self._install_initial_model_file()
1507
+ self._is_trained_model_installed = True
1433
1508
 
1434
1509
  opt = self._run_script(data, script_command, new_partition_columns, return_types)
1435
1510
 
@@ -1513,7 +1588,6 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1513
1588
  group_columns)
1514
1589
 
1515
1590
  file_name = "sklearn_model_selection_split.py"
1516
- self._install_script_file(file_identifier=file_name.split(".")[0], file_name=file_name)
1517
1591
 
1518
1592
  script_file_path = f"{file_name}" if self._is_lake_system \
1519
1593
  else f"./{self._db_name}/{file_name}"
@@ -1548,7 +1622,11 @@ class _SkLearnObjectWrapper(_OpenSourceObjectWrapper):
1548
1622
  # Get unique values in partitioning columns.
1549
1623
  self._fit_partition_unique_values = data.drop_duplicate(new_partition_columns).get_values()
1550
1624
 
1551
- self._install_initial_model_file()
1625
+ # Checking the trained model installation. If not installed,
1626
+ # install it and set flag to True.
1627
+ if not self._is_trained_model_installed:
1628
+ self._install_initial_model_file()
1629
+ self._is_trained_model_installed = True
1552
1630
 
1553
1631
  opt = self._run_script(data, script_command, new_partition_columns, return_types)
1554
1632
 
@@ -1633,11 +1711,19 @@ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
1633
1711
 
1634
1712
  script_file_path = f"{self._model_file_name}" if self._is_lake_system \
1635
1713
  else f"./{self._db_name}/{self._model_file_name}"
1714
+
1715
+ model_file_prefix = None
1716
+ if self._is_lake_system:
1717
+ model_file_prefix = self._model_file_name.replace(".py", "")
1718
+
1636
1719
  py_exc = UtilFuncs._get_python_execution_path()
1637
- script_command = f"{py_exc} {script_file_path} {partition_indices_str} {data_column_types_str} {data_args_str}"
1720
+ script_command = (f"{py_exc} {script_file_path} {partition_indices_str} "\
1721
+ f"{data_column_types_str} {data_args_str} {self._is_lake_system}"\
1722
+ f" {model_file_prefix}")
1638
1723
 
1639
- return_types = [(col, self.__tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
1640
- for col in partition_cols] + [(self.__func_name, CLOB())]
1724
+ model_type = BLOB() if self._is_lake_system else CLOB()
1725
+ return_types = [(col, self.__tdml_df._td_column_names_and_sqlalchemy_types[col.lower()])
1726
+ for col in partition_cols] + [(self.__func_name, model_type)]
1641
1727
 
1642
1728
  # Generate new file in .teradataml directory and install it to Vantage.
1643
1729
  self._prepare_and_install_file()
@@ -1652,8 +1738,7 @@ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
1652
1738
 
1653
1739
  # File cleanup after processing.
1654
1740
  os.remove(self._model_file_local)
1655
- remove_file(file_identifier=self._model_file_name.split(".")[0], suppress_output=True,
1656
- force_remove=True)
1741
+ self._remove_script_file(self._model_file_name)
1657
1742
 
1658
1743
  return self.modelObj
1659
1744
 
@@ -1697,7 +1782,7 @@ class _SKLearnFunctionWrapper(_GenericObjectWrapper):
1697
1782
 
1698
1783
  def _prepare_and_install_file(self):
1699
1784
  """
1700
- Prepare function script file from template file and install it in Vaantage.
1785
+ Prepare function script file from template file and install it in Vantage.
1701
1786
  """
1702
1787
  with open(os.path.join(self._scripts_path, "sklearn_function.template")) as fp:
1703
1788
  script_data = fp.read()
@@ -1,11 +1,13 @@
1
+ from teradataml.common.deprecations import argument_deprecation
1
2
  from teradataml.common.exceptions import TeradataMlException
2
3
  from teradataml.common.messagecodes import MessageCodes
3
4
  from teradataml.common.messages import Messages
4
5
  from teradataml.options.configure import configure
5
6
  from teradataml.utils.internal_buffer import _InternalBuffer
6
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
7
+ from teradataml.telemetry_utils.queryband import collect_queryband
7
8
 
8
9
 
10
+ @argument_deprecation("future", ["auth_token", "ues_url"], False, None)
9
11
  @collect_queryband(queryband="StCnfgPrms")
10
12
  def set_config_params(**kwargs):
11
13
  """
@@ -58,6 +58,7 @@ class _Configure(_ConfigureSuper):
58
58
  indb_install_location = _create_property('indb_install_location')
59
59
  openml_user_env = _create_property('openml_user_env')
60
60
  local_storage = _create_property('local_storage')
61
+ stored_procedure_install_location = _create_property('stored_procedure_install_location')
61
62
 
62
63
  def __init__(self, default_varchar_size=1024, column_casesensitive_handler = False,
63
64
  vantage_version="vantage1.1", val_install_location=None,
@@ -66,7 +67,7 @@ class _Configure(_ConfigureSuper):
66
67
  read_nos_function_mapping="read_nos", write_nos_function_mapping="write_nos",
67
68
  cran_repositories=None, inline_plot=True,
68
69
  indb_install_location="/var/opt/teradata/languages/sles12sp3/Python/",
69
- openml_user_env=None, local_storage=None):
70
+ openml_user_env=None, local_storage=None, stored_procedure_install_location="SYSLIB"):
70
71
 
71
72
  """
72
73
  PARAMETERS:
@@ -170,6 +171,16 @@ class _Configure(_ConfigureSuper):
170
171
  Example:
171
172
  # Set the garbage collector location to "/Users/gc/"
172
173
  teradataml.options.configure.local_storage = "/Users/gc/"
174
+
175
+ stored_procedure_install_location:
176
+ Specifies the name of the database where stored procedures
177
+ are installed.
178
+ Types: string
179
+ Example:
180
+ # Set the Stored Procedure install location to 'SYSLIB'
181
+ # when stored procedures are installed in 'SYSLIB'.
182
+ teradataml.options.configure.stored_procedure_install_location = "SYSLIB"
183
+
173
184
  """
174
185
  super().__init__()
175
186
  super().__setattr__('default_varchar_size', default_varchar_size)
@@ -187,6 +198,7 @@ class _Configure(_ConfigureSuper):
187
198
  super().__setattr__('indb_install_location', indb_install_location)
188
199
  super().__setattr__('openml_user_env', openml_user_env)
189
200
  super().__setattr__('local_storage', local_storage)
201
+ super().__setattr__('stored_procedure_install_location', stored_procedure_install_location)
190
202
 
191
203
  # internal configurations
192
204
  # These configurations are internal and should not be
@@ -301,7 +313,7 @@ class _Configure(_ConfigureSuper):
301
313
  'read_nos_function_mapping', 'write_nos_function_mapping',
302
314
  '_byom_model_catalog_database', '_byom_model_catalog_table',
303
315
  '_byom_model_catalog_license', '_byom_model_catalog_license_source',
304
- 'indb_install_location', 'local_storage']:
316
+ 'indb_install_location', 'local_storage', 'stored_procedure_install_location']:
305
317
  if not isinstance(value, str):
306
318
  raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, name,
307
319
  'str'),
@@ -58,7 +58,7 @@ class _Display(_DisplaySuper):
58
58
  byte_encoding = 'base16',
59
59
  print_sqlmr_query = False,
60
60
  blob_length=10,
61
- suppress_vantage_runtime_warnings=False,
61
+ suppress_vantage_runtime_warnings=True,
62
62
  geometry_column_length=30):
63
63
  """
64
64
  PARAMETERS:
@@ -111,7 +111,7 @@ class _Display(_DisplaySuper):
111
111
  Specifies whether to display the warnings raised by the Vantage or not.
112
112
  When set to True, warnings raised by Vantage are not displayed.
113
113
  Otherwise, warnings are displayed.
114
- Default Value: False
114
+ Default Value: True
115
115
  Types: bool
116
116
  Example:
117
117
  display.suppress_vantage_runtime_warnings = True
teradataml/plot/axis.py CHANGED
@@ -951,7 +951,7 @@ class Axis:
951
951
  # Execute the node and create the table in Vantage.
952
952
  if self.__y_axis_data[0]._parent_df._table_name is None:
953
953
  # Assuming all the columns are from same DataFrame.
954
- repr(self.__y_axis_data[0]._parent_df)
954
+ self.__y_axis_data[0]._parent_df.materialize()
955
955
 
956
956
  series = TDSeries(data=_df if self.ignore_nulls else self.__x_axis_data[0]._parent_df,
957
957
  id=self.__series_identifier,
@@ -994,7 +994,7 @@ class Axis:
994
994
  # Remove null values from DataFrame
995
995
  if self.ignore_nulls:
996
996
  _df = _df.dropna()
997
- repr(_df)
997
+ _df.materialize()
998
998
  series = TDSeries(data=_df,
999
999
  id="id",
1000
1000
  row_index="x",
@@ -1049,7 +1049,7 @@ class Axis:
1049
1049
 
1050
1050
  # Execute the node and create the table/view in Vantage.
1051
1051
  if self.__y_axis_data[0]._parent_df._table_name is None:
1052
- repr(self.__y_axis_data[0]._parent_df)
1052
+ self.__y_axis_data[0]._parent_df.materialize()
1053
1053
 
1054
1054
  matrix = TDMatrix(data=_df if self.ignore_nulls else self.__x_axis_data[0]._parent_df,
1055
1055
  id=self.__series_identifier,
@@ -1077,7 +1077,7 @@ class Axis:
1077
1077
  # Remove null values from DataFrame
1078
1078
  if self.ignore_nulls:
1079
1079
  _df = _df.dropna()
1080
- repr(_df)
1080
+ _df.materialize()
1081
1081
  matrix = TDMatrix(data=_df,
1082
1082
  id="id",
1083
1083
  row_index="x",
@@ -32,7 +32,7 @@ from teradataml.common.messagecodes import MessageCodes
32
32
  from teradataml.common.utils import UtilFuncs
33
33
  from teradataml.utils.validators import _Validators
34
34
  from urllib.parse import urlparse
35
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
35
+ from teradataml.telemetry_utils.queryband import collect_queryband
36
36
 
37
37
 
38
38
  def _get_ues_url(env_type="users", **kwargs):
@@ -535,6 +535,8 @@ class UserEnv:
535
535
 
536
536
  # Check if file exists or not.
537
537
  _Validators._validate_file_exists(file_path)
538
+ # Check if file is empty or not.
539
+ _Validators._check_empty_file(file_path)
538
540
 
539
541
  try:
540
542
  # If file size is more than 10 MB, upload the file to cloud and export it to UES.
@@ -681,11 +683,11 @@ class UserEnv:
681
683
  "x-ms-blob-type": CloudProvider.X_MS_BLOB_TYPE.value}
682
684
 
683
685
  # Initiate file upload to cloud.
684
- response = UtilFuncs._http_request(cloud_storage_url,
685
- HTTPRequest.PUT,
686
- data=UtilFuncs._get_file_contents(file_path,
687
- read_in_binary_mode=True),
688
- headers=headers)
686
+ with open(file_path, 'rb') as fp:
687
+ response = UtilFuncs._http_request(cloud_storage_url,
688
+ HTTPRequest.PUT,
689
+ data=fp,
690
+ headers=headers)
689
691
 
690
692
  # Since the API is not for UES, it is better to validate and raise error separately.
691
693
  if not (200 <= response.status_code < 300):
@@ -3521,6 +3523,8 @@ class UserEnv:
3521
3523
  user environment created in Vantage Languages Ecosystem. If
3522
3524
  model with same name already exists in the remote user
3523
3525
  environment, error is thrown.
3526
+ Note:
3527
+ Maximum size of the model should be less than or equal to 5GB.
3524
3528
 
3525
3529
  PARAMETERS:
3526
3530
  model_path:
@@ -38,7 +38,7 @@ import warnings
38
38
  import webbrowser
39
39
  from urllib.parse import parse_qs, urlparse
40
40
  from teradataml.utils.utils import _async_run_id_info
41
- from teradatasqlalchemy.telemetry.queryband import collect_queryband
41
+ from teradataml.telemetry_utils.queryband import collect_queryband
42
42
 
43
43
 
44
44
  @collect_queryband(queryband="LstBsEnv")
@@ -1121,7 +1121,8 @@ def get_env(env_name):
1121
1121
  # Get environments created by the current logged in user.
1122
1122
  user_envs_df = list_user_envs()
1123
1123
 
1124
- if env_name not in user_envs_df.env_name.values:
1124
+ if (user_envs_df is None or
1125
+ (not user_envs_df.empty and env_name not in user_envs_df.env_name.values)):
1125
1126
  msg_code = MessageCodes.FUNC_EXECUTION_FAILED
1126
1127
  error_msg = Messages.get_message(msg_code, "get_env()", "User environment '{}' not found."
1127
1128
  " Use 'create_env()' function to create"
@@ -431,8 +431,8 @@ class Script(TableOperator):
431
431
  from teradataml import list_td_reserved_keywords
432
432
  if get_connection():
433
433
  # Checking for reserved keywords and raising error if present.
434
- for column_name in self.returns:
435
- list_td_reserved_keywords(key=column_name, raise_error=True)
434
+ columns = self.returns
435
+ list_td_reserved_keywords(key=columns, raise_error=True)
436
436
 
437
437
  def __validate(self):
438
438
  """