teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (151) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +193 -1
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +25 -18
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  8. teradataml/analytics/sqle/__init__.py +20 -2
  9. teradataml/analytics/utils.py +15 -1
  10. teradataml/analytics/valib.py +18 -4
  11. teradataml/automl/__init__.py +341 -112
  12. teradataml/automl/autodataprep/__init__.py +471 -0
  13. teradataml/automl/data_preparation.py +84 -42
  14. teradataml/automl/data_transformation.py +69 -33
  15. teradataml/automl/feature_engineering.py +76 -9
  16. teradataml/automl/feature_exploration.py +639 -25
  17. teradataml/automl/model_training.py +35 -14
  18. teradataml/clients/auth_client.py +2 -2
  19. teradataml/common/__init__.py +1 -2
  20. teradataml/common/constants.py +122 -63
  21. teradataml/common/messagecodes.py +14 -3
  22. teradataml/common/messages.py +8 -4
  23. teradataml/common/sqlbundle.py +40 -10
  24. teradataml/common/utils.py +366 -74
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +348 -86
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/apriori_example.json +22 -0
  29. teradataml/data/byom_example.json +11 -0
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  37. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  38. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  39. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  40. teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
  41. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  42. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  43. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  45. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  46. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  47. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  48. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  49. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  51. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  52. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  53. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  54. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  55. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  56. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  57. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  58. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  59. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  60. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  61. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  62. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  63. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  64. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  65. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  66. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  67. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  68. teradataml/data/hnsw_alter_data.csv +5 -0
  69. teradataml/data/hnsw_data.csv +10 -0
  70. teradataml/data/jsons/byom/h2opredict.json +1 -1
  71. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  72. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  73. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  74. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  75. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  76. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  77. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  78. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  79. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  80. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  81. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  82. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  83. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  84. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  85. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  86. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  87. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  88. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  89. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  90. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  91. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  92. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  93. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
  94. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
  95. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
  96. teradataml/data/ner_dict.csv +8 -0
  97. teradataml/data/ner_input_eng.csv +7 -0
  98. teradataml/data/ner_rule.csv +5 -0
  99. teradataml/data/pos_input.csv +40 -0
  100. teradataml/data/tdnerextractor_example.json +14 -0
  101. teradataml/data/teradataml_example.json +21 -0
  102. teradataml/data/textmorph_example.json +5 -0
  103. teradataml/data/to_num_data.csv +4 -0
  104. teradataml/data/tochar_data.csv +5 -0
  105. teradataml/data/trans_dense.csv +16 -0
  106. teradataml/data/trans_sparse.csv +55 -0
  107. teradataml/data/vectordistance_example.json +1 -1
  108. teradataml/dataframe/copy_to.py +45 -29
  109. teradataml/dataframe/data_transfer.py +72 -46
  110. teradataml/dataframe/dataframe.py +642 -166
  111. teradataml/dataframe/dataframe_utils.py +167 -22
  112. teradataml/dataframe/functions.py +135 -20
  113. teradataml/dataframe/setop.py +11 -6
  114. teradataml/dataframe/sql.py +330 -78
  115. teradataml/dbutils/dbutils.py +556 -140
  116. teradataml/dbutils/filemgr.py +14 -10
  117. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  118. teradataml/lib/aed_0_1.dll +0 -0
  119. teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
  120. teradataml/opensource/_class.py +141 -17
  121. teradataml/opensource/{constants.py → _constants.py} +7 -3
  122. teradataml/opensource/_lightgbm.py +52 -53
  123. teradataml/opensource/_sklearn.py +1008 -0
  124. teradataml/opensource/_wrapper_utils.py +5 -5
  125. teradataml/options/__init__.py +47 -15
  126. teradataml/options/configure.py +103 -26
  127. teradataml/options/display.py +13 -2
  128. teradataml/plot/axis.py +47 -8
  129. teradataml/plot/figure.py +33 -0
  130. teradataml/plot/plot.py +63 -13
  131. teradataml/scriptmgmt/UserEnv.py +307 -40
  132. teradataml/scriptmgmt/lls_utils.py +428 -145
  133. teradataml/store/__init__.py +2 -3
  134. teradataml/store/feature_store/feature_store.py +102 -7
  135. teradataml/table_operators/Apply.py +48 -19
  136. teradataml/table_operators/Script.py +23 -2
  137. teradataml/table_operators/TableOperator.py +3 -1
  138. teradataml/table_operators/table_operator_util.py +58 -9
  139. teradataml/utils/dtypes.py +49 -1
  140. teradataml/utils/internal_buffer.py +38 -0
  141. teradataml/utils/validators.py +377 -62
  142. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
  143. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
  144. teradataml/data/SQL_Fundamentals.pdf +0 -0
  145. teradataml/libaed_0_1.dylib +0 -0
  146. teradataml/libaed_0_1.so +0 -0
  147. teradataml/opensource/sklearn/__init__.py +0 -0
  148. teradataml/store/vector_store/__init__.py +0 -1586
  149. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
  150. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
  151. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
@@ -15,10 +15,14 @@
15
15
  # ##################################################################
16
16
 
17
17
  from importlib import import_module
18
- from teradataml.opensource.sklearn._sklearn_wrapper import _SkLearnObjectWrapper, _SKLearnFunctionWrapper
19
- from teradataml.opensource._lightgbm import _LightgbmDatasetWrapper, \
20
- _LightgbmFunctionWrapper, _LightgbmBoosterWrapper, _LighgbmSklearnWrapper
21
- from teradataml.opensource.constants import _SKL_MODULES, _LIGHTGBM_MODULES
18
+
19
+ from teradataml.opensource._constants import _LIGHTGBM_MODULES, _SKL_MODULES
20
+ from teradataml.opensource._lightgbm import (_LightgbmSklearnWrapper,
21
+ _LightgbmBoosterWrapper,
22
+ _LightgbmDatasetWrapper,
23
+ _LightgbmFunctionWrapper)
24
+ from teradataml.opensource._sklearn import (_SKLearnFunctionWrapper,
25
+ _SkLearnObjectWrapper)
22
26
 
23
27
 
24
28
  class _OpenSource:
@@ -82,6 +86,7 @@ class _OpenSource:
82
86
  Specifies the teradataml supported opensource model object that is to be deployed.
83
87
  Currently supported models are:
84
88
  - sklearn
89
+ - lightgbm
85
90
  Types: object
86
91
 
87
92
  replace_if_exists:
@@ -100,6 +105,9 @@ class _OpenSource:
100
105
  "replace_if_exists" is set to False.
101
106
 
102
107
  EXAMPLES:
108
+ ## sklearn examples.
109
+
110
+ # Import required packages and create LinearRegression sklearn object.
103
111
  >>> from teradataml import td_sklearn
104
112
  >>> from sklearn.linear_model import LinearRegression
105
113
  >>> model = LinearRegression(normalize=True)
@@ -118,6 +126,53 @@ class _OpenSource:
118
126
  >>> lin_reg
119
127
  LinearRegression(normalize=True)
120
128
 
129
+ ## lightgbm examples.
130
+
131
+ # Import required packages and create LGBMClassifier lightGBM object.
132
+ >>> from teradataml import td_lightgbm
133
+ >>> import lightgbm as lgb
134
+ >>> model = lgb.LGBMClassifier()
135
+
136
+ # Example 1: Deploy the LightGBM model to Vantage.
137
+ >>> lgb_model = td_lightgbm.deploy("lgb_model_ver_1", model)
138
+ Model is saved.
139
+ >>> lgb_model
140
+ LGBMClassifier()
141
+
142
+ # Example 2: Deploy the LightGBM model to Vantage with the name same as that of model that
143
+ # already existed in Vantage.
144
+ >>> lgb_model = td_lightgbm.deploy("lgb_model_ver_1", model, replace_if_exists=True)
145
+ Model is deleted.
146
+ Model is saved.
147
+ >>> lgb_model
148
+ LGBMClassifier()
149
+
150
+ # Example 3: Deploy LightGBM model trained locally using train() function to Vantage.
151
+ # Create Dataset object locally, assuming pdf_x and pdf_y are the feature and label pandas
152
+ # DataFrames.
153
+ >>> lgbm_data = lgb.Dataset(data=pdf_x, label=pdf_y, free_raw_data=False)
154
+ >>> lgbm_data
155
+ <lightgbm.basic.Dataset object at ....>
156
+
157
+ # Train the model using train() function.
158
+ >>> model = lgb.train(params={}, train_set=lgbm_data, num_boost_round=30, valid_sets=[lgbm_data])
159
+ [LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000043 seconds.
160
+ You can set `force_row_wise=true` to remove the overhead.
161
+ And if memory is not enough, you can set `force_col_wise=true`.
162
+ [LightGBM] [Info] Total Bins 532
163
+ [LightGBM] [Info] Number of data points in the train set: 400, number of used features: 4
164
+ [1] valid_0's l2: 0.215811
165
+ [2] valid_0's l2: 0.188138
166
+ [3] valid_0's l2: 0.166146
167
+ ...
168
+ ...
169
+ [29] valid_0's l2: 0.042255
170
+ [30] valid_0's l2: 0.0416953
171
+
172
+ # Deploy the model to Vantage.
173
+ >>> lgb_model = td_lightgbm.deploy("lgb_model_ver_2", model)
174
+ >>> lgb_model
175
+ <lightgbm.basic.Booster object at ...>
121
176
 
122
177
  """
123
178
  return self._object_wrapper._deploy(model_name=model_name,
@@ -145,6 +200,7 @@ class _OpenSource:
145
200
  TeradataMlException if model with name "model_name" does not exist.
146
201
 
147
202
  EXAMPLE:
203
+ # sklearn example.
148
204
  >>> from teradataml import td_sklearn
149
205
  >>> # Load the model saved in Vantage. Note that the model is saved using
150
206
  >>> # `deploy()` of exposed interface object (like `td_sklearn`) or
@@ -152,6 +208,15 @@ class _OpenSource:
152
208
  >>> model = td_sklearn.load("linreg_model_ver_1")
153
209
  >>> model
154
210
  LinearRegression(normalize=True)
211
+
212
+ # lightgbm example.
213
+ >>> from teradataml import td_lightgbm
214
+ >>> # Load the model saved in Vantage. Note that the model is saved using
215
+ >>> # `deploy()` of exposed interface object (like `td_lightgbm`) or
216
+ >>> # `_OpenSourceObjectWrapper` Object.
217
+ >>> model = td_lightgbm.load("lgb_model_ver_1")
218
+ >>> model
219
+ LGBMClassifier()
155
220
  """
156
221
  return self._object_wrapper._load(model_name)
157
222
 
@@ -290,18 +355,73 @@ class Lightgbm(_OpenSource):
290
355
  >>> df_x = df.select(df.columns[:-1])
291
356
  >>> df_y = df.select(df.columns[-1])
292
357
 
358
+ >>> from teradataml import td_lightgbm
359
+
360
+ # Example 1: Train the model using train() function.
293
361
  # Create lightgbm Dataset object.
294
362
  >>> lgbm_data = td_lightgbm.Dataset(data=df_x, label=df_y, free_raw_data=False)
295
363
  >>> lgbm_data
296
- <lightgbm.basic.Dataset object at 0x7f33f0656820>
364
+ <lightgbm.basic.Dataset object at ...>
365
+
366
+ # Train the model.
367
+ >>> model = td_lightgbm.train(params={}, train_set=lgbm_data, num_boost_round=30, valid_sets=[lgbm_data])
368
+ [LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000043 seconds.
369
+ You can set `force_row_wise=true` to remove the overhead.
370
+ And if memory is not enough, you can set `force_col_wise=true`.
371
+ [LightGBM] [Info] Total Bins 532
372
+ [LightGBM] [Info] Number of data points in the train set: 400, number of used features: 4
373
+ [1] valid_0's l2: 0.215811
374
+ [2] valid_0's l2: 0.188138
375
+ [3] valid_0's l2: 0.166146
376
+ ...
377
+ ...
378
+ [29] valid_0's l2: 0.042255
379
+ [30] valid_0's l2: 0.0416953
380
+ >>> model
381
+ <lightgbm.basic.Booster object at ...>
382
+
383
+ # Example 2: Train the model using LGBMClassifier sklearn object.
384
+ # Create lightgbm sklearn object.
385
+ >>> lgbm_cl = td_lightgbm.LGBMClassifier()
386
+ >>> lgbm_cl
387
+ LGBMClassifier()
388
+
389
+ # Fit/train the model using fit() function.
390
+ >>> lgbm_cl.fit(df_x, df_y)
391
+ LGBMClassifier()
392
+
393
+ # Perform prediction.
394
+ >>> lgbm_cl.predict(df_x).head(3)
395
+ col1 col2 col3 col4 lgbmclassifier_predict_1
396
+ 0 1.105026 -1.949894 -1.537164 0.073171 1
397
+ 1 1.878349 0.577289 1.795746 2.762539 1
398
+ 2 -1.130582 -0.020296 -0.710234 -1.440991 0
399
+
400
+ # Access attributes.
401
+ >>> lgbm_cl.feature_importances_
402
+ array([ 0, 20, 10, 10])
297
403
  """
298
404
 
299
405
  def __init__(self):
300
406
  super().__init__()
301
407
  self._modules = _LIGHTGBM_MODULES
302
- self._object_wrapper = _LightgbmDatasetWrapper
408
+ self._object_wrapper = _LightgbmBoosterWrapper
303
409
  self._function_wrapper = _LightgbmFunctionWrapper
304
410
 
411
+ def _assign_object_wrapper(self, module, class_name):
412
+ """
413
+ Assigns the appropriate object wrapper based on the module and class name.
414
+ """
415
+
416
+ if module == "lightgbm.basic" and class_name == "Booster":
417
+ self._object_wrapper = _LightgbmBoosterWrapper
418
+
419
+ if module == "lightgbm.basic" and class_name == "Dataset":
420
+ self._object_wrapper = _LightgbmDatasetWrapper
421
+
422
+ if module == "lightgbm.sklearn":
423
+ self._object_wrapper = _LightgbmSklearnWrapper
424
+
305
425
  def __getattr__(self, name):
306
426
 
307
427
  def __get_module(*c, **kwargs):
@@ -317,23 +437,27 @@ class Lightgbm(_OpenSource):
317
437
  return self._function_wrapper(module_name=module, func_name=name)(**kwargs)
318
438
 
319
439
  kwargs.update(zip(class_instance.__init__.__code__.co_varnames[1:], c))
320
- if module == "lightgbm.basic" and name == "Booster":
321
- return _LightgbmBoosterWrapper(module_name=module, class_name=name, kwargs=kwargs)
322
-
323
- if module == "lightgbm.sklearn":
324
- return _LighgbmSklearnWrapper(module_name=module, class_name=name, kwargs=kwargs)
325
440
 
326
- return self._object_wrapper(module_name=module, class_name=name, kwargs=kwargs)
441
+ all_args = {"module_name": module, "class_name": name, "kwargs": kwargs}
442
+ self._assign_object_wrapper(module, name)
443
+
444
+ return self._object_wrapper(**all_args)
327
445
 
328
446
  return __get_module
329
447
 
330
448
  def deploy(self, model_name, model, replace_if_exists=False):
331
- raise NotImplementedError("The deploy() function is not yet supported for td_lightgbm. \
332
- Support will be added in future releases.")
449
+ # Docstring of parent class also contain examples of lightgbm.
450
+ module = model.__module__ if hasattr(model, "__module__") else None
451
+ class_name = model.__class__.__name__ if hasattr(model, "__class__") else None
333
452
 
334
- def load(self, model_name):
335
- raise NotImplementedError("The load() function is not yet supported for td_lightgbm. \
336
- Support will be added in future releases.")
453
+ if module is None or class_name is None:
454
+ raise ValueError("The model object is not supported for deployment.")
455
+
456
+ self._assign_object_wrapper(module, class_name)
457
+
458
+ return self._object_wrapper._deploy(model_name=model_name,
459
+ model=model,
460
+ replace_if_exists=replace_if_exists)
337
461
 
338
462
 
339
463
  td_sklearn = Sklearn()
@@ -13,11 +13,11 @@
13
13
  #
14
14
  # ##################################################################
15
15
 
16
- from enum import Enum
17
- from teradataml import VARCHAR, BLOB
18
16
  from dataclasses import dataclass, field
19
- from typing import Any, Dict, Tuple, Optional
17
+ from enum import Enum
18
+ from typing import Any, Dict, Optional, Tuple
20
19
 
20
+ from teradataml import BLOB, VARCHAR
21
21
 
22
22
  _SKL_MODULES = ["sklearn.calibration", "sklearn.cluster", "sklearn.compose", "sklearn.covariance",
23
23
  "sklearn.decomposition", "sklearn.discriminant_analysis",
@@ -40,6 +40,8 @@ class OpenSourcePackage(Enum):
40
40
  return [item.value for item in cls]
41
41
 
42
42
 
43
+ _packages_verified_in_vantage = {} # Used to ensure check for python and python packages done only once per package.
44
+
43
45
  @dataclass
44
46
  class OpensourceModels:
45
47
  """Dataclass for Opensource Models details."""
@@ -49,6 +51,8 @@ class OpensourceModels:
49
51
  pos_args: Tuple[Any] = tuple() # Positional arguments used for model creation.
50
52
  key_args: Dict[str, Any] = field(default_factory=dict) # Keyword arguments used for model creation.
51
53
  fit_partition_columns_non_default: Optional[str] = None # Columns used for partitioning.
54
+ osml_module: Optional[str] = None # Module of corresponding wrapper class.
55
+ osml_class: Optional[str] = None # Corresponding wrapper class name.
52
56
 
53
57
  # Model table details used by opensource BYOM.
54
58
  _OSML_MODELS_TABLE_NAME = "opensourceml_models"
@@ -19,19 +19,20 @@ import json
19
19
  import os
20
20
  import pickle
21
21
  import warnings
22
-
23
22
  from collections import OrderedDict
24
23
  from importlib import import_module
25
24
 
26
-
25
+ import numpy
27
26
  import pandas as pd
28
27
  from teradatasqlalchemy import BLOB, CLOB, FLOAT
29
28
 
30
- from teradataml import _TDML_DIRECTORY, UtilFuncs, execute_sql, TeradataMlException, Messages, MessageCodes, DataFrame
29
+ from teradataml import (_TDML_DIRECTORY, MessageCodes, Messages,
30
+ TeradataMlException, UtilFuncs, execute_sql)
31
+ from teradataml.opensource._base import (_FunctionWrapper,
32
+ _OpenSourceObjectWrapper)
33
+ from teradataml.opensource._constants import OpenSourcePackage
34
+ from teradataml.opensource._sklearn import _SkLearnObjectWrapper
31
35
  from teradataml.opensource._wrapper_utils import _generate_new_name
32
- from teradataml.opensource.constants import OpenSourcePackage
33
- from teradataml.opensource.sklearn._sklearn_wrapper import (
34
- _FunctionWrapper, _OpenSourceObjectWrapper, _SkLearnObjectWrapper)
35
36
 
36
37
 
37
38
  class _LightgbmDatasetWrapper(_OpenSourceObjectWrapper):
@@ -43,6 +44,7 @@ class _LightgbmDatasetWrapper(_OpenSourceObjectWrapper):
43
44
 
44
45
  file_type = "file_fn_lightgbm"
45
46
  self._template_file = "dataset.template"
47
+ self._pkgs = ["lightgbm", "scikit-learn", "numpy", "scipy"]
46
48
  super().__init__(model=model, module_name=module_name, class_name=class_name, kwargs=kwargs)
47
49
 
48
50
  self._scripts_path = os.path.join(_TDML_DIRECTORY, "data", "scripts", "lightgbm")
@@ -221,12 +223,16 @@ class _LightgbmDatasetWrapper(_OpenSourceObjectWrapper):
221
223
 
222
224
  return self
223
225
 
226
+ def deploy(self, model_name, replace_if_exists=False):
227
+ raise ValueError("lightgbm Dataset object is not the model object that can be trained. "
228
+ "Hence, not deployable.")
224
229
 
225
230
  class _LightgbmFunctionWrapper(_FunctionWrapper):
226
231
  OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.LIGHTGBM
227
232
  def __init__(self, module_name=None, func_name=None):
228
233
  file_type = "file_fn_lightgbm"
229
234
  template_file = "lightgbm_function.template"
235
+ self._pkgs = ["lightgbm", "scikit-learn", "numpy", "scipy"]
230
236
  self._script_file_name = _generate_new_name(type=file_type, extension="py")
231
237
  super().__init__(module_name, func_name, file_type=file_type, template_file=template_file)
232
238
  self._scripts_path = os.path.join(_TDML_DIRECTORY, "data", "scripts", "lightgbm")
@@ -462,10 +468,9 @@ class _LightgbmBoosterWrapper(_SkLearnObjectWrapper):
462
468
  OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.LIGHTGBM
463
469
  def __init__(self, model=None, module_name=None, class_name=None, kwargs=None, model_column_name=None):
464
470
  file_type = "file_fn_lightgbm_booster"
465
-
466
471
  self._model_column_name = model_column_name
467
-
468
472
  self.record_evaluation_result = None
473
+ self._pkgs = ["lightgbm", "scikit-learn", "numpy", "scipy"]
469
474
 
470
475
  if model is not None and isinstance(model, dict) and self._model_column_name in model.keys():
471
476
  self.record_evaluation_result = model["record_evaluation_result"]
@@ -500,10 +505,6 @@ class _LightgbmBoosterWrapper(_SkLearnObjectWrapper):
500
505
  class_obj = getattr(import_module(self.module_name), self.class_name)
501
506
  self.modelObj = class_obj(**self.kwargs)
502
507
 
503
- def deploy(self, model_name, replace_if_exists=False):
504
- raise NotImplementedError("The deploy() function is not yet supported for lightgbm OpensourceML objects. \
505
- Support will be added in future releases.")
506
-
507
508
  @property
508
509
  def model_info(self):
509
510
  """
@@ -662,42 +663,6 @@ class _LightgbmBoosterWrapper(_SkLearnObjectWrapper):
662
663
  """
663
664
  return self.modelObj
664
665
 
665
- def _convert_arguments_to_modelObj(self, args, idx_multi_model=None):
666
- """
667
- Internal function to convert all OpensourceML related objects in arguments to
668
- underlying model objects.
669
- """
670
- if isinstance(args, dict):
671
- new_args = args.copy() # To avoid updating
672
- for k, v in new_args.items():
673
- if isinstance(v, type(self)) or isinstance(v, _LightgbmDatasetWrapper):
674
- if idx_multi_model is None:
675
- # single model. This argument (idx_multi_model) is set only when modelObj
676
- # is multi model.
677
- new_args[k] = v.modelObj
678
- else:
679
- # multi-model. Get appropriate model from modelObj.
680
- new_args[k] = v.modelObj.iloc[idx_multi_model][self._model_column_name]
681
- else:
682
- new_args[k] = v
683
- return new_args
684
-
685
- # If args is tuple, convert all elements to underlying model object.
686
- elif isinstance(args, tuple):
687
- new_args = tuple()
688
- for arg in args:
689
- if isinstance(arg, type(self)) or isinstance(arg, _LightgbmDatasetWrapper):
690
- if idx_multi_model is None:
691
- # single model. This argument is set only when modelObj is single model.
692
- new_args += (arg.modelObj,)
693
- else:
694
- # multi-model. Get appropriate model from modelObj.
695
- new_args += (arg.modelObj.iloc[idx_multi_model][self._model_column_name],)
696
- else:
697
- new_args += (arg,)
698
- return new_args
699
- return args
700
-
701
666
  def __getattr__(self, name):
702
667
  def __run_transform(*c, **kwargs):
703
668
  # Lightgbm predict method takes other keyword arguments along with data related arguments.
@@ -729,6 +694,43 @@ class _LightgbmBoosterWrapper(_SkLearnObjectWrapper):
729
694
  return __run_transform
730
695
  return super().__getattr__(name)
731
696
 
697
+ def _execute_function_locally(self, ten_row_data, feature_columns, label_columns, openml_obj,
698
+ func_name, **kwargs):
699
+ """
700
+ Function which overrides the existing _execute_function_locally method to handle ValueError
701
+ as argument names are different in lightgbm compared to scikit-learn.
702
+ """
703
+ X = numpy.array(ten_row_data)
704
+
705
+ if label_columns:
706
+ n_f = len(feature_columns)
707
+ n_c = len(label_columns)
708
+ y = X[:,n_f : n_f + n_c]
709
+ X = X[:,:n_f]
710
+ # predict() now takes 'y' ("label" lightgbm argument) also for it to return the labels
711
+ # from script. Skipping 'y' in local run if passed.
712
+ # Generally, 'y' is passed to return y along with actual output.
713
+ # Since actual lightgbm predict() does not have "label" argument and have other arguments like
714
+ # "start_iteration" etc, local run in try block is resulting into ValueError as
715
+ # "ValueError: The truth value of an array with more than one element is ambiguous.
716
+ # Use a.any() or a.all()" for "start_iteration" argument because the value for "y" is
717
+ # taken for "start_iteration" positional argument. Hence, skipping y in local run.
718
+ try:
719
+ trans_opt = getattr(openml_obj, func_name)(X, y, **kwargs)
720
+ except TypeError as _:
721
+ # Function which does not accept 'y' like predict_proba() raises error like
722
+ # "predict_proba() takes 2 positional arguments but 3 were given".
723
+ trans_opt = getattr(openml_obj, func_name)(X, **kwargs)
724
+ except ValueError as _:
725
+ trans_opt = getattr(openml_obj, func_name)(X, **kwargs)
726
+ else:
727
+ trans_opt = getattr(openml_obj, func_name)(X, **kwargs)
728
+
729
+ if isinstance(trans_opt, numpy.ndarray) and trans_opt.shape == (X.shape[0],):
730
+ trans_opt = trans_opt.reshape(X.shape[0], 1)
731
+
732
+ return trans_opt
733
+
732
734
  def _transform(self, **kwargs):
733
735
  # Overwriting existing _transform method to handle data related arguments and other
734
736
  # keyword arguments.
@@ -773,16 +775,13 @@ class _LightgbmBoosterWrapper(_SkLearnObjectWrapper):
773
775
  return self.modelObj.__repr__()
774
776
 
775
777
 
776
- class _LighgbmSklearnWrapper(_SkLearnObjectWrapper):
778
+ class _LightgbmSklearnWrapper(_SkLearnObjectWrapper):
777
779
  OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.LIGHTGBM
778
780
  def __init__(self, model=None, module_name=None, class_name=None, kwargs=None):
781
+ self._pkgs = ["lightgbm", "scikit-learn", "numpy", "scipy"]
779
782
  super().__init__(model=model, module_name=module_name, class_name=class_name, kwargs=kwargs)
780
783
  self._scripts_path = os.path.join(_TDML_DIRECTORY, "data", "scripts", "lightgbm")
781
784
 
782
- def deploy(self, model_name, replace_if_exists=False):
783
- raise NotImplementedError("The deploy() function is not yet supported for lightgbm OpensourceML objects. \
784
- Support will be added in future releases.")
785
-
786
785
  def set_params(self, **params):
787
786
  """
788
787
  Please check the description in Docs/OpensourceML/sklearn.py.