teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +315 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +95 -8
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/metadata.py +12 -3
  8. teradataml/analytics/json_parser/utils.py +7 -2
  9. teradataml/analytics/sqle/__init__.py +5 -1
  10. teradataml/analytics/table_operator/__init__.py +1 -1
  11. teradataml/analytics/uaf/__init__.py +1 -1
  12. teradataml/analytics/utils.py +4 -0
  13. teradataml/analytics/valib.py +18 -4
  14. teradataml/automl/__init__.py +51 -6
  15. teradataml/automl/data_preparation.py +59 -35
  16. teradataml/automl/data_transformation.py +58 -33
  17. teradataml/automl/feature_engineering.py +27 -12
  18. teradataml/automl/model_training.py +73 -46
  19. teradataml/common/constants.py +88 -29
  20. teradataml/common/garbagecollector.py +2 -1
  21. teradataml/common/messagecodes.py +19 -3
  22. teradataml/common/messages.py +6 -1
  23. teradataml/common/sqlbundle.py +64 -12
  24. teradataml/common/utils.py +246 -47
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +161 -27
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/byom_example.json +11 -0
  29. teradataml/data/dataframe_example.json +18 -2
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  37. teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
  38. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  39. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  40. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  41. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  42. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  43. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  44. teradataml/data/hnsw_alter_data.csv +5 -0
  45. teradataml/data/hnsw_data.csv +10 -0
  46. teradataml/data/jsons/byom/h2opredict.json +1 -1
  47. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  48. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  49. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  50. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  51. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  52. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  53. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  54. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  55. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  56. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  57. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  58. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  59. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  60. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  61. teradataml/data/medical_readings.csv +101 -0
  62. teradataml/data/patient_profile.csv +101 -0
  63. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  64. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  65. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  66. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  67. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  68. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  69. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  70. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  71. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  72. teradataml/data/target_udt_data.csv +8 -0
  73. teradataml/data/templates/open_source_ml.json +3 -2
  74. teradataml/data/teradataml_example.json +8 -0
  75. teradataml/data/vectordistance_example.json +4 -0
  76. teradataml/dataframe/copy_to.py +8 -3
  77. teradataml/dataframe/data_transfer.py +11 -1
  78. teradataml/dataframe/dataframe.py +1049 -285
  79. teradataml/dataframe/dataframe_utils.py +152 -20
  80. teradataml/dataframe/functions.py +578 -35
  81. teradataml/dataframe/setop.py +11 -6
  82. teradataml/dataframe/sql.py +185 -16
  83. teradataml/dbutils/dbutils.py +1049 -115
  84. teradataml/dbutils/filemgr.py +48 -1
  85. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  86. teradataml/lib/aed_0_1.dll +0 -0
  87. teradataml/opensource/__init__.py +1 -1
  88. teradataml/opensource/_base.py +1466 -0
  89. teradataml/opensource/_class.py +464 -0
  90. teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
  91. teradataml/opensource/_lightgbm.py +949 -0
  92. teradataml/opensource/_sklearn.py +1008 -0
  93. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
  94. teradataml/options/__init__.py +54 -38
  95. teradataml/options/configure.py +131 -27
  96. teradataml/options/display.py +13 -2
  97. teradataml/plot/axis.py +47 -8
  98. teradataml/plot/figure.py +33 -0
  99. teradataml/plot/plot.py +63 -13
  100. teradataml/scriptmgmt/UserEnv.py +5 -5
  101. teradataml/scriptmgmt/lls_utils.py +130 -40
  102. teradataml/store/__init__.py +12 -0
  103. teradataml/store/feature_store/__init__.py +0 -0
  104. teradataml/store/feature_store/constants.py +291 -0
  105. teradataml/store/feature_store/feature_store.py +2318 -0
  106. teradataml/store/feature_store/models.py +1505 -0
  107. teradataml/table_operators/Apply.py +32 -18
  108. teradataml/table_operators/Script.py +3 -1
  109. teradataml/table_operators/TableOperator.py +3 -1
  110. teradataml/table_operators/query_generator.py +3 -0
  111. teradataml/table_operators/table_operator_query_generator.py +3 -1
  112. teradataml/table_operators/table_operator_util.py +37 -38
  113. teradataml/table_operators/templates/dataframe_register.template +69 -0
  114. teradataml/utils/dtypes.py +51 -2
  115. teradataml/utils/internal_buffer.py +18 -0
  116. teradataml/utils/validators.py +99 -8
  117. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
  118. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
  119. teradataml/libaed_0_1.dylib +0 -0
  120. teradataml/libaed_0_1.so +0 -0
  121. teradataml/opensource/sklearn/__init__.py +0 -1
  122. teradataml/opensource/sklearn/_class.py +0 -255
  123. teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
  124. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  125. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  126. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
@@ -0,0 +1,464 @@
1
+ # ##################################################################
2
+ #
3
+ # Copyright 2023 Teradata. All rights reserved.
4
+ # TERADATA CONFIDENTIAL AND TRADE SECRET
5
+ #
6
+ # Primary Owner: Adithya Avvaru (adithya.avvaru@teradata.com)
7
+ # Secondary Owner: Pankaj Purandare (pankajvinod.purandare@teradata.com)
8
+ #
9
+ # Version: 1.0
10
+ # Function Version: 1.0
11
+ #
12
+ # This file contains classes for Opensource packages like sklearn,
13
+ # lightgbm etc and their corresponding objects.
14
+ #
15
+ # ##################################################################
16
+
17
+ from importlib import import_module
18
+
19
+ from teradataml.opensource._constants import _LIGHTGBM_MODULES, _SKL_MODULES
20
+ from teradataml.opensource._lightgbm import (_LightgbmSklearnWrapper,
21
+ _LightgbmBoosterWrapper,
22
+ _LightgbmDatasetWrapper,
23
+ _LightgbmFunctionWrapper)
24
+ from teradataml.opensource._sklearn import (_SKLearnFunctionWrapper,
25
+ _SkLearnObjectWrapper)
26
+
27
+
28
+ class _OpenSource:
29
+ """
30
+ A class to extend teradataml to other open source packages like scikit-learn,
31
+ spark, pytorch, snowflake etc.
32
+ """
33
+
34
+ def __init__(self):
35
+ self._modules = None
36
+ self._object_wrapper = None
37
+ self._function_wrapper = None
38
+
39
+ def _get_module_and_class_instance(self, name):
40
+ """
41
+ Internal function to get the module and class instance/function which will
42
+ be passed to object/function wrapper.
43
+ """
44
+ class_instance = None
45
+ module = None
46
+ for module in self._modules:
47
+ lib = import_module(module)
48
+ try:
49
+ class_instance = getattr(lib, name)
50
+ break
51
+ except AttributeError as ex:
52
+ continue
53
+
54
+ if not class_instance:
55
+ raise ValueError(f"The class/function '{name}' does not exist in '{self.__class__.name.lower()}' modules.")
56
+
57
+ return module, class_instance
58
+
59
+ def __getattr__(self, name):
60
+
61
+ def __get_module(*c, **kwargs):
62
+ module, class_instance = self._get_module_and_class_instance(name)
63
+
64
+ # If the attribute is a function, then return the function object.
65
+ if type(class_instance).__name__ == "function":
66
+ return self._function_wrapper(module_name=module, func_name=name)(*c, **kwargs)
67
+
68
+ return self._object_wrapper(module_name=module, class_name=name,
69
+ pos_args=c, kwargs=kwargs)
70
+
71
+ return __get_module
72
+
73
+ def deploy(self, model_name, model, replace_if_exists=False):
74
+ """
75
+ DESCRIPTION:
76
+ Deploys the model to Vantage.
77
+
78
+ PARAMETERS:
79
+ model_name:
80
+ Required Argument.
81
+ Specifies the unique name of the model to be deployed.
82
+ Types: str
83
+
84
+ model:
85
+ Required Argument.
86
+ Specifies the teradataml supported opensource model object that is to be deployed.
87
+ Currently supported models are:
88
+ - sklearn
89
+ - lightgbm
90
+ Types: object
91
+
92
+ replace_if_exists:
93
+ Optional Argument.
94
+ Specifies whether to replace the model if a model with the same name already
95
+ exists in Vantage. If this argument is set to False and a model with the same
96
+ name already exists, then the function raises an exception.
97
+ Default Value: False
98
+ Types: bool
99
+
100
+ RETURNS:
101
+ The opensource object wrapper.
102
+
103
+ RAISES:
104
+ TeradataMLException if model with "model_name" already exists and the argument
105
+ "replace_if_exists" is set to False.
106
+
107
+ EXAMPLES:
108
+ ## sklearn examples.
109
+
110
+ # Import required packages and create LinearRegression sklearn object.
111
+ >>> from teradataml import td_sklearn
112
+ >>> from sklearn.linear_model import LinearRegression
113
+ >>> model = LinearRegression(normalize=True)
114
+
115
+ # Example 1: Deploy the model to Vantage.
116
+ >>> lin_reg = td_sklearn.deploy("linreg_model_ver_1", model)
117
+ Model is saved.
118
+ >>> lin_reg
119
+ LinearRegression(normalize=True)
120
+
121
+ # Example 2: Deploy the model to Vantage with the name same as that of model that
122
+ # already existed in Vantage.
123
+ >>> lin_reg = td_sklearn.deploy("linreg_model_ver_1", model, replace_if_exists=True)
124
+ Model is deleted.
125
+ Model is saved.
126
+ >>> lin_reg
127
+ LinearRegression(normalize=True)
128
+
129
+ ## lightgbm examples.
130
+
131
+ # Import required packages and create LGBMClassifier lightGBM object.
132
+ >>> from teradataml import td_lightgbm
133
+ >>> import lightgbm as lgb
134
+ >>> model = lgb.LGBMClassifier()
135
+
136
+ # Example 1: Deploy the LightGBM model to Vantage.
137
+ >>> lgb_model = td_lightgbm.deploy("lgb_model_ver_1", model)
138
+ Model is saved.
139
+ >>> lgb_model
140
+ LGBMClassifier()
141
+
142
+ # Example 2: Deploy the LightGBM model to Vantage with the name same as that of model that
143
+ # already existed in Vantage.
144
+ >>> lgb_model = td_lightgbm.deploy("lgb_model_ver_1", model, replace_if_exists=True)
145
+ Model is deleted.
146
+ Model is saved.
147
+ >>> lgb_model
148
+ LGBMClassifier()
149
+
150
+ # Example 3: Deploy LightGBM model trained locally using train() function to Vantage.
151
+ # Create Dataset object locally, assuming pdf_x and pdf_y are the feature and label pandas
152
+ # DataFrames.
153
+ >>> lgbm_data = lgb.Dataset(data=pdf_x, label=pdf_y, free_raw_data=False)
154
+ >>> lgbm_data
155
+ <lightgbm.basic.Dataset object at ....>
156
+
157
+ # Train the model using train() function.
158
+ >>> model = lgb.train(params={}, train_set=lgbm_data, num_boost_round=30, valid_sets=[lgbm_data])
159
+ [LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000043 seconds.
160
+ You can set `force_row_wise=true` to remove the overhead.
161
+ And if memory is not enough, you can set `force_col_wise=true`.
162
+ [LightGBM] [Info] Total Bins 532
163
+ [LightGBM] [Info] Number of data points in the train set: 400, number of used features: 4
164
+ [1] valid_0's l2: 0.215811
165
+ [2] valid_0's l2: 0.188138
166
+ [3] valid_0's l2: 0.166146
167
+ ...
168
+ ...
169
+ [29] valid_0's l2: 0.042255
170
+ [30] valid_0's l2: 0.0416953
171
+
172
+ # Deploy the model to Vantage.
173
+ >>> lgb_model = td_lightgbm.deploy("lgb_model_ver_2", model)
174
+ >>> lgb_model
175
+ <lightgbm.basic.Booster object at ...>
176
+
177
+ """
178
+ return self._object_wrapper._deploy(model_name=model_name,
179
+ model=model,
180
+ replace_if_exists=replace_if_exists)
181
+
182
+ def load(self, model_name):
183
+ """
184
+ DESCRIPTION:
185
+ Loads the model from Vantage based on the interface object on which this function
186
+ is called.
187
+ For example, if the model in "model_name" argument is statsmodel model, then this
188
+ function raises exception if the interface object is `td_sklearn`.
189
+
190
+ PARAMETERS:
191
+ model_name:
192
+ Required Argument.
193
+ Specifies the name of the model to be loaded.
194
+ Types: str
195
+
196
+ RETURNS:
197
+ The opensource object wrapper.
198
+
199
+ RAISES:
200
+ TeradataMlException if model with name "model_name" does not exist.
201
+
202
+ EXAMPLE:
203
+ # sklearn example.
204
+ >>> from teradataml import td_sklearn
205
+ >>> # Load the model saved in Vantage. Note that the model is saved using
206
+ >>> # `deploy()` of exposed interface object (like `td_sklearn`) or
207
+ >>> # `_OpenSourceObjectWrapper` Object.
208
+ >>> model = td_sklearn.load("linreg_model_ver_1")
209
+ >>> model
210
+ LinearRegression(normalize=True)
211
+
212
+ # lightgbm example.
213
+ >>> from teradataml import td_lightgbm
214
+ >>> # Load the model saved in Vantage. Note that the model is saved using
215
+ >>> # `deploy()` of exposed interface object (like `td_lightgbm`) or
216
+ >>> # `_OpenSourceObjectWrapper` Object.
217
+ >>> model = td_lightgbm.load("lgb_model_ver_1")
218
+ >>> model
219
+ LGBMClassifier()
220
+ """
221
+ return self._object_wrapper._load(model_name)
222
+
223
+
224
+ class Sklearn(_OpenSource):
225
+ """
226
+ DESCRIPTION:
227
+ Interface object to access exposed classes and functions of scikit-learn
228
+ opensource package. All the classes and functions can be run and attributes
229
+ can be accessed using the object created by "td_sklearn" interface object.
230
+ Refer Teradata Python Package User Guide for more information about OpenML
231
+ and exposed interface objects.
232
+
233
+ PARAMETERS:
234
+ None
235
+
236
+ RETURNS:
237
+ None
238
+
239
+ EXAMPLES:
240
+ # Load example data.
241
+ >>> load_example_data("openml", ["test_classification", "test_prediction"])
242
+ >>> df = DataFrame("test_classification")
243
+ >>> df.head(3)
244
+ col2 col3 col4 label
245
+ col1
246
+ -2.560430 0.402232 -1.100742 -2.959588 0
247
+ -3.587546 0.291819 -1.850169 -4.331055 0
248
+ -3.697436 1.576888 -0.461220 -3.598652 0
249
+
250
+ >>> df_test = DataFrame("test_prediction")
251
+ >>> df_test.head(3)
252
+ col2 col3 col4
253
+ col1
254
+ -2.560430 0.402232 -1.100742 -2.959588
255
+ -3.587546 0.291819 -1.850169 -4.331055
256
+ -3.697436 1.576888 -0.461220 -3.598652
257
+
258
+
259
+ # Get the feature and label data.
260
+ >>> df_x_clasif = df.select(df.columns[:-1])
261
+ >>> df_y_clasif = df.select(df.columns[-1])
262
+
263
+ >>> from teradataml import td_sklearn
264
+ >>> dt_cl = td_sklearn.DecisionTreeClassifier(random_state=0)
265
+ >>> dt_cl
266
+ DecisionTreeClassifier(random_state=0)
267
+
268
+ # Set the paramaters.
269
+ >>> dt_cl.set_params(random_state=2, max_features="sqrt")
270
+ DecisionTreeClassifier(max_features='sqrt', random_state=2)
271
+
272
+ # Get the paramaters.
273
+ >>> dt_cl.get_params()
274
+ {'ccp_alpha': 0.0,
275
+ 'class_weight': None,
276
+ 'criterion': 'gini',
277
+ 'max_depth': None,
278
+ 'max_features': 'sqrt',
279
+ 'max_leaf_nodes': None,
280
+ 'min_impurity_decrease': 0.0,
281
+ 'min_impurity_split': None,
282
+ 'min_samples_leaf': 1,
283
+ 'min_samples_split': 2,
284
+ 'min_weight_fraction_leaf': 0.0,
285
+ 'random_state': 2,
286
+ 'splitter': 'best'}
287
+
288
+ # Train the model using fit().
289
+ >>> dt_cl.fit(df_x_clasif, df_y_clasif)
290
+ DecisionTreeClassifier(max_features='sqrt', random_state=2)
291
+
292
+ # Perform prediction.
293
+ >>> dt_cl.predict(df_test)
294
+ col1 col2 col3 col4 decisiontreeclassifier_predict_1
295
+ 0 1.105026 -1.949894 -1.537164 0.073171 1
296
+ 1 1.878349 0.577289 1.795746 2.762539 1
297
+ 2 -1.130582 -0.020296 -0.710234 -1.440991 0
298
+ 3 -1.243781 0.280821 -0.437933 -1.379770 0
299
+ 4 -0.509793 0.492659 0.248207 -0.309591 1
300
+ 5 -0.345538 -2.296723 -2.811807 -1.993113 0
301
+ 6 0.709217 -1.481740 -1.247431 -0.109140 0
302
+ 7 -1.621842 1.713381 0.955084 -0.885921 1
303
+ 8 2.425481 -0.549892 0.851440 2.689135 1
304
+ 9 1.780375 -1.749949 -0.900142 1.061262 0
305
+
306
+ # Perform scoring.
307
+ >>> dt_cl.score(df_x_clasif, df_y_clasif)
308
+ score
309
+ 0 1.0
310
+
311
+ # Access few attributes.
312
+ >>> dt_cl.classes_
313
+ array([0., 1.])
314
+
315
+ >>> dt_cl.feature_importances_
316
+ array([0.06945187, 0.02 , 0.67786339, 0.23268474])
317
+
318
+ >>> dt_cl.max_features_
319
+ 2
320
+ """
321
+ def __init__(self):
322
+ super().__init__()
323
+ self._modules = _SKL_MODULES
324
+ self._object_wrapper = _SkLearnObjectWrapper
325
+ self._function_wrapper = _SKLearnFunctionWrapper
326
+
327
+
328
+ class Lightgbm(_OpenSource):
329
+ """
330
+ DESCRIPTION:
331
+ Interface object to access exposed classes and functions of lightgbm
332
+ opensource package. All the classes and functions can be run and attributes
333
+ can be accessed using the object created by "td_lightgbm" interface object.
334
+ Refer Teradata Python Package User Guide for more information about OpenML
335
+ and exposed interface objects.
336
+
337
+ PARAMETERS:
338
+ None
339
+
340
+ RETURNS:
341
+ None
342
+
343
+ EXAMPLES:
344
+ # Load example data.
345
+ >>> load_example_data("openml", ["test_classification"])
346
+ >>> df = DataFrame("test_classification")
347
+ >>> df.head(3)
348
+ col2 col3 col4 label
349
+ col1
350
+ -2.560430 0.402232 -1.100742 -2.959588 0
351
+ -3.587546 0.291819 -1.850169 -4.331055 0
352
+ -3.697436 1.576888 -0.461220 -3.598652 0
353
+
354
+ # Get the feature and label data.
355
+ >>> df_x = df.select(df.columns[:-1])
356
+ >>> df_y = df.select(df.columns[-1])
357
+
358
+ >>> from teradataml import td_lightgbm
359
+
360
+ # Example 1: Train the model using train() function.
361
+ # Create lightgbm Dataset object.
362
+ >>> lgbm_data = td_lightgbm.Dataset(data=df_x, label=df_y, free_raw_data=False)
363
+ >>> lgbm_data
364
+ <lightgbm.basic.Dataset object at ...>
365
+
366
+ # Train the model.
367
+ >>> model = td_lightgbm.train(params={}, train_set=lgbm_data, num_boost_round=30, valid_sets=[lgbm_data])
368
+ [LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000043 seconds.
369
+ You can set `force_row_wise=true` to remove the overhead.
370
+ And if memory is not enough, you can set `force_col_wise=true`.
371
+ [LightGBM] [Info] Total Bins 532
372
+ [LightGBM] [Info] Number of data points in the train set: 400, number of used features: 4
373
+ [1] valid_0's l2: 0.215811
374
+ [2] valid_0's l2: 0.188138
375
+ [3] valid_0's l2: 0.166146
376
+ ...
377
+ ...
378
+ [29] valid_0's l2: 0.042255
379
+ [30] valid_0's l2: 0.0416953
380
+ >>> model
381
+ <lightgbm.basic.Booster object at ...>
382
+
383
+ # Example 2: Train the model using LGBMClassifier sklearn object.
384
+ # Create lightgbm sklearn object.
385
+ >>> lgbm_cl = td_lightgbm.LGBMClassifier()
386
+ >>> lgbm_cl
387
+ LGBMClassifier()
388
+
389
+ # Fit/train the model using fit() function.
390
+ >>> lgbm_cl.fit(df_x, df_y)
391
+ LGBMClassifier()
392
+
393
+ # Perform prediction.
394
+ >>> lgbm_cl.predict(df_x).head(3)
395
+ col1 col2 col3 col4 lgbmclassifier_predict_1
396
+ 0 1.105026 -1.949894 -1.537164 0.073171 1
397
+ 1 1.878349 0.577289 1.795746 2.762539 1
398
+ 2 -1.130582 -0.020296 -0.710234 -1.440991 0
399
+
400
+ # Access attributes.
401
+ >>> lgbm_cl.feature_importances_
402
+ array([ 0, 20, 10, 10])
403
+ """
404
+
405
+ def __init__(self):
406
+ super().__init__()
407
+ self._modules = _LIGHTGBM_MODULES
408
+ self._object_wrapper = _LightgbmBoosterWrapper
409
+ self._function_wrapper = _LightgbmFunctionWrapper
410
+
411
+ def _assign_object_wrapper(self, module, class_name):
412
+ """
413
+ Assigns the appropriate object wrapper based on the module and class name.
414
+ """
415
+
416
+ if module == "lightgbm.basic" and class_name == "Booster":
417
+ self._object_wrapper = _LightgbmBoosterWrapper
418
+
419
+ if module == "lightgbm.basic" and class_name == "Dataset":
420
+ self._object_wrapper = _LightgbmDatasetWrapper
421
+
422
+ if module == "lightgbm.sklearn":
423
+ self._object_wrapper = _LightgbmSklearnWrapper
424
+
425
+ def __getattr__(self, name):
426
+
427
+ def __get_module(*c, **kwargs):
428
+ module, class_instance = self._get_module_and_class_instance(name)
429
+
430
+ # If the attribute is a function, then return the function object.
431
+ if type(class_instance).__name__ == "function":
432
+ kwargs.update(zip(class_instance.__code__.co_varnames, c))
433
+
434
+ if module == "lightgbm.callback":
435
+ return {"module": module, "func_name": name, "kwargs": kwargs}
436
+
437
+ return self._function_wrapper(module_name=module, func_name=name)(**kwargs)
438
+
439
+ kwargs.update(zip(class_instance.__init__.__code__.co_varnames[1:], c))
440
+
441
+ all_args = {"module_name": module, "class_name": name, "kwargs": kwargs}
442
+ self._assign_object_wrapper(module, name)
443
+
444
+ return self._object_wrapper(**all_args)
445
+
446
+ return __get_module
447
+
448
+ def deploy(self, model_name, model, replace_if_exists=False):
449
+ # Docstring of parent class also contain examples of lightgbm.
450
+ module = model.__module__ if hasattr(model, "__module__") else None
451
+ class_name = model.__class__.__name__ if hasattr(model, "__class__") else None
452
+
453
+ if module is None or class_name is None:
454
+ raise ValueError("The model object is not supported for deployment.")
455
+
456
+ self._assign_object_wrapper(module, class_name)
457
+
458
+ return self._object_wrapper._deploy(model_name=model_name,
459
+ model=model,
460
+ replace_if_exists=replace_if_exists)
461
+
462
+
463
+ td_sklearn = Sklearn()
464
+ td_lightgbm = Lightgbm()
@@ -13,30 +13,35 @@
13
13
  #
14
14
  # ##################################################################
15
15
 
16
- from enum import Enum
17
- from teradataml import VARCHAR, BLOB
18
16
  from dataclasses import dataclass, field
19
- from typing import Any, Dict, Tuple, Optional, List
20
-
21
-
22
- _MODULES = ["sklearn.calibration", "sklearn.cluster", "sklearn.compose", "sklearn.covariance",
23
- "sklearn.decomposition", "sklearn.discriminant_analysis",
24
- "sklearn.dummy", "sklearn.ensemble", "sklearn.feature_extraction", "sklearn.feature_selection",
25
- "sklearn.gaussian_process", "sklearn.impute", "sklearn.isotonic", "sklearn.kernel_approximation",
26
- "sklearn.kernel_ridge", "sklearn.linear_model", "sklearn.manifold", "sklearn.mixture",
27
- "sklearn.model_selection", "sklearn.multiclass", "sklearn.multioutput", "sklearn.naive_bayes",
28
- "sklearn.neighbors", "sklearn.neural_network", "sklearn.preprocessing", "sklearn.random_projection",
29
- "sklearn.semi_supervised", "sklearn.svm", "sklearn.tree", "sklearn.pipeline", "sklearn.cross_decomposition",
30
- "sklearn.gaussian_process.kernels", "sklearn.metrics"]
17
+ from enum import Enum
18
+ from typing import Any, Dict, Optional, Tuple
19
+
20
+ from teradataml import BLOB, VARCHAR
21
+
22
+ _SKL_MODULES = ["sklearn.calibration", "sklearn.cluster", "sklearn.compose", "sklearn.covariance",
23
+ "sklearn.decomposition", "sklearn.discriminant_analysis",
24
+ "sklearn.dummy", "sklearn.ensemble", "sklearn.feature_extraction", "sklearn.feature_selection",
25
+ "sklearn.gaussian_process", "sklearn.impute", "sklearn.isotonic", "sklearn.kernel_approximation",
26
+ "sklearn.kernel_ridge", "sklearn.linear_model", "sklearn.manifold", "sklearn.mixture",
27
+ "sklearn.model_selection", "sklearn.multiclass", "sklearn.multioutput", "sklearn.naive_bayes",
28
+ "sklearn.neighbors", "sklearn.neural_network", "sklearn.preprocessing", "sklearn.random_projection",
29
+ "sklearn.semi_supervised", "sklearn.svm", "sklearn.tree", "sklearn.pipeline", "sklearn.cross_decomposition",
30
+ "sklearn.gaussian_process.kernels", "sklearn.metrics"]
31
+ _LIGHTGBM_MODULES = ["lightgbm.basic", "lightgbm.callback", "lightgbm.compat", "lightgbm.engine", "lightgbm.sklearn"]
32
+ # "lightgbm.cv", "lightgbm.dask",
31
33
 
32
34
  class OpenSourcePackage(Enum):
33
35
  SKLEARN = "sklearn"
36
+ LIGHTGBM = "lightgbm"
34
37
 
35
38
  @classmethod
36
39
  def values(cls):
37
40
  return [item.value for item in cls]
38
41
 
39
42
 
43
+ _packages_verified_in_vantage = {} # Used to ensure check for python and python packages done only once per package.
44
+
40
45
  @dataclass
41
46
  class OpensourceModels:
42
47
  """Dataclass for Opensource Models details."""
@@ -46,6 +51,8 @@ class OpensourceModels:
46
51
  pos_args: Tuple[Any] = tuple() # Positional arguments used for model creation.
47
52
  key_args: Dict[str, Any] = field(default_factory=dict) # Keyword arguments used for model creation.
48
53
  fit_partition_columns_non_default: Optional[str] = None # Columns used for partitioning.
54
+ osml_module: Optional[str] = None # Module of corresponding wrapper class.
55
+ osml_class: Optional[str] = None # Corresponding wrapper class name.
49
56
 
50
57
  # Model table details used by opensource BYOM.
51
58
  _OSML_MODELS_TABLE_NAME = "opensourceml_models"