teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +315 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +95 -8
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +5 -1
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +51 -6
- teradataml/automl/data_preparation.py +59 -35
- teradataml/automl/data_transformation.py +58 -33
- teradataml/automl/feature_engineering.py +27 -12
- teradataml/automl/model_training.py +73 -46
- teradataml/common/constants.py +88 -29
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +19 -3
- teradataml/common/messages.py +6 -1
- teradataml/common/sqlbundle.py +64 -12
- teradataml/common/utils.py +246 -47
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +161 -27
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/teradataml_example.json +8 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +11 -1
- teradataml/dataframe/dataframe.py +1049 -285
- teradataml/dataframe/dataframe_utils.py +152 -20
- teradataml/dataframe/functions.py +578 -35
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +185 -16
- teradataml/dbutils/dbutils.py +1049 -115
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/_base.py +1466 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
- teradataml/options/__init__.py +54 -38
- teradataml/options/configure.py +131 -27
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +5 -5
- teradataml/scriptmgmt/lls_utils.py +130 -40
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2318 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/table_operators/Apply.py +32 -18
- teradataml/table_operators/Script.py +3 -1
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +51 -2
- teradataml/utils/internal_buffer.py +18 -0
- teradataml/utils/validators.py +99 -8
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_class.py +0 -255
- teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,464 @@
|
|
|
1
|
+
# ##################################################################
|
|
2
|
+
#
|
|
3
|
+
# Copyright 2023 Teradata. All rights reserved.
|
|
4
|
+
# TERADATA CONFIDENTIAL AND TRADE SECRET
|
|
5
|
+
#
|
|
6
|
+
# Primary Owner: Adithya Avvaru (adithya.avvaru@teradata.com)
|
|
7
|
+
# Secondary Owner: Pankaj Purandare (pankajvinod.purandare@teradata.com)
|
|
8
|
+
#
|
|
9
|
+
# Version: 1.0
|
|
10
|
+
# Function Version: 1.0
|
|
11
|
+
#
|
|
12
|
+
# This file contains classes for Opensource packages like sklearn,
|
|
13
|
+
# lightgbm etc and their corresponding objects.
|
|
14
|
+
#
|
|
15
|
+
# ##################################################################
|
|
16
|
+
|
|
17
|
+
from importlib import import_module
|
|
18
|
+
|
|
19
|
+
from teradataml.opensource._constants import _LIGHTGBM_MODULES, _SKL_MODULES
|
|
20
|
+
from teradataml.opensource._lightgbm import (_LightgbmSklearnWrapper,
|
|
21
|
+
_LightgbmBoosterWrapper,
|
|
22
|
+
_LightgbmDatasetWrapper,
|
|
23
|
+
_LightgbmFunctionWrapper)
|
|
24
|
+
from teradataml.opensource._sklearn import (_SKLearnFunctionWrapper,
|
|
25
|
+
_SkLearnObjectWrapper)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class _OpenSource:
|
|
29
|
+
"""
|
|
30
|
+
A class to extend teradataml to other open source packages like scikit-learn,
|
|
31
|
+
spark, pytorch, snowflake etc.
|
|
32
|
+
"""
|
|
33
|
+
|
|
34
|
+
def __init__(self):
|
|
35
|
+
self._modules = None
|
|
36
|
+
self._object_wrapper = None
|
|
37
|
+
self._function_wrapper = None
|
|
38
|
+
|
|
39
|
+
def _get_module_and_class_instance(self, name):
|
|
40
|
+
"""
|
|
41
|
+
Internal function to get the module and class instance/function which will
|
|
42
|
+
be passed to object/function wrapper.
|
|
43
|
+
"""
|
|
44
|
+
class_instance = None
|
|
45
|
+
module = None
|
|
46
|
+
for module in self._modules:
|
|
47
|
+
lib = import_module(module)
|
|
48
|
+
try:
|
|
49
|
+
class_instance = getattr(lib, name)
|
|
50
|
+
break
|
|
51
|
+
except AttributeError as ex:
|
|
52
|
+
continue
|
|
53
|
+
|
|
54
|
+
if not class_instance:
|
|
55
|
+
raise ValueError(f"The class/function '{name}' does not exist in '{self.__class__.name.lower()}' modules.")
|
|
56
|
+
|
|
57
|
+
return module, class_instance
|
|
58
|
+
|
|
59
|
+
def __getattr__(self, name):
|
|
60
|
+
|
|
61
|
+
def __get_module(*c, **kwargs):
|
|
62
|
+
module, class_instance = self._get_module_and_class_instance(name)
|
|
63
|
+
|
|
64
|
+
# If the attribute is a function, then return the function object.
|
|
65
|
+
if type(class_instance).__name__ == "function":
|
|
66
|
+
return self._function_wrapper(module_name=module, func_name=name)(*c, **kwargs)
|
|
67
|
+
|
|
68
|
+
return self._object_wrapper(module_name=module, class_name=name,
|
|
69
|
+
pos_args=c, kwargs=kwargs)
|
|
70
|
+
|
|
71
|
+
return __get_module
|
|
72
|
+
|
|
73
|
+
def deploy(self, model_name, model, replace_if_exists=False):
|
|
74
|
+
"""
|
|
75
|
+
DESCRIPTION:
|
|
76
|
+
Deploys the model to Vantage.
|
|
77
|
+
|
|
78
|
+
PARAMETERS:
|
|
79
|
+
model_name:
|
|
80
|
+
Required Argument.
|
|
81
|
+
Specifies the unique name of the model to be deployed.
|
|
82
|
+
Types: str
|
|
83
|
+
|
|
84
|
+
model:
|
|
85
|
+
Required Argument.
|
|
86
|
+
Specifies the teradataml supported opensource model object that is to be deployed.
|
|
87
|
+
Currently supported models are:
|
|
88
|
+
- sklearn
|
|
89
|
+
- lightgbm
|
|
90
|
+
Types: object
|
|
91
|
+
|
|
92
|
+
replace_if_exists:
|
|
93
|
+
Optional Argument.
|
|
94
|
+
Specifies whether to replace the model if a model with the same name already
|
|
95
|
+
exists in Vantage. If this argument is set to False and a model with the same
|
|
96
|
+
name already exists, then the function raises an exception.
|
|
97
|
+
Default Value: False
|
|
98
|
+
Types: bool
|
|
99
|
+
|
|
100
|
+
RETURNS:
|
|
101
|
+
The opensource object wrapper.
|
|
102
|
+
|
|
103
|
+
RAISES:
|
|
104
|
+
TeradataMLException if model with "model_name" already exists and the argument
|
|
105
|
+
"replace_if_exists" is set to False.
|
|
106
|
+
|
|
107
|
+
EXAMPLES:
|
|
108
|
+
## sklearn examples.
|
|
109
|
+
|
|
110
|
+
# Import required packages and create LinearRegression sklearn object.
|
|
111
|
+
>>> from teradataml import td_sklearn
|
|
112
|
+
>>> from sklearn.linear_model import LinearRegression
|
|
113
|
+
>>> model = LinearRegression(normalize=True)
|
|
114
|
+
|
|
115
|
+
# Example 1: Deploy the model to Vantage.
|
|
116
|
+
>>> lin_reg = td_sklearn.deploy("linreg_model_ver_1", model)
|
|
117
|
+
Model is saved.
|
|
118
|
+
>>> lin_reg
|
|
119
|
+
LinearRegression(normalize=True)
|
|
120
|
+
|
|
121
|
+
# Example 2: Deploy the model to Vantage with the name same as that of model that
|
|
122
|
+
# already existed in Vantage.
|
|
123
|
+
>>> lin_reg = td_sklearn.deploy("linreg_model_ver_1", model, replace_if_exists=True)
|
|
124
|
+
Model is deleted.
|
|
125
|
+
Model is saved.
|
|
126
|
+
>>> lin_reg
|
|
127
|
+
LinearRegression(normalize=True)
|
|
128
|
+
|
|
129
|
+
## lightgbm examples.
|
|
130
|
+
|
|
131
|
+
# Import required packages and create LGBMClassifier lightGBM object.
|
|
132
|
+
>>> from teradataml import td_lightgbm
|
|
133
|
+
>>> import lightgbm as lgb
|
|
134
|
+
>>> model = lgb.LGBMClassifier()
|
|
135
|
+
|
|
136
|
+
# Example 1: Deploy the LightGBM model to Vantage.
|
|
137
|
+
>>> lgb_model = td_lightgbm.deploy("lgb_model_ver_1", model)
|
|
138
|
+
Model is saved.
|
|
139
|
+
>>> lgb_model
|
|
140
|
+
LGBMClassifier()
|
|
141
|
+
|
|
142
|
+
# Example 2: Deploy the LightGBM model to Vantage with the name same as that of model that
|
|
143
|
+
# already existed in Vantage.
|
|
144
|
+
>>> lgb_model = td_lightgbm.deploy("lgb_model_ver_1", model, replace_if_exists=True)
|
|
145
|
+
Model is deleted.
|
|
146
|
+
Model is saved.
|
|
147
|
+
>>> lgb_model
|
|
148
|
+
LGBMClassifier()
|
|
149
|
+
|
|
150
|
+
# Example 3: Deploy LightGBM model trained locally using train() function to Vantage.
|
|
151
|
+
# Create Dataset object locally, assuming pdf_x and pdf_y are the feature and label pandas
|
|
152
|
+
# DataFrames.
|
|
153
|
+
>>> lgbm_data = lgb.Dataset(data=pdf_x, label=pdf_y, free_raw_data=False)
|
|
154
|
+
>>> lgbm_data
|
|
155
|
+
<lightgbm.basic.Dataset object at ....>
|
|
156
|
+
|
|
157
|
+
# Train the model using train() function.
|
|
158
|
+
>>> model = lgb.train(params={}, train_set=lgbm_data, num_boost_round=30, valid_sets=[lgbm_data])
|
|
159
|
+
[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000043 seconds.
|
|
160
|
+
You can set `force_row_wise=true` to remove the overhead.
|
|
161
|
+
And if memory is not enough, you can set `force_col_wise=true`.
|
|
162
|
+
[LightGBM] [Info] Total Bins 532
|
|
163
|
+
[LightGBM] [Info] Number of data points in the train set: 400, number of used features: 4
|
|
164
|
+
[1] valid_0's l2: 0.215811
|
|
165
|
+
[2] valid_0's l2: 0.188138
|
|
166
|
+
[3] valid_0's l2: 0.166146
|
|
167
|
+
...
|
|
168
|
+
...
|
|
169
|
+
[29] valid_0's l2: 0.042255
|
|
170
|
+
[30] valid_0's l2: 0.0416953
|
|
171
|
+
|
|
172
|
+
# Deploy the model to Vantage.
|
|
173
|
+
>>> lgb_model = td_lightgbm.deploy("lgb_model_ver_2", model)
|
|
174
|
+
>>> lgb_model
|
|
175
|
+
<lightgbm.basic.Booster object at ...>
|
|
176
|
+
|
|
177
|
+
"""
|
|
178
|
+
return self._object_wrapper._deploy(model_name=model_name,
|
|
179
|
+
model=model,
|
|
180
|
+
replace_if_exists=replace_if_exists)
|
|
181
|
+
|
|
182
|
+
def load(self, model_name):
|
|
183
|
+
"""
|
|
184
|
+
DESCRIPTION:
|
|
185
|
+
Loads the model from Vantage based on the interface object on which this function
|
|
186
|
+
is called.
|
|
187
|
+
For example, if the model in "model_name" argument is statsmodel model, then this
|
|
188
|
+
function raises exception if the interface object is `td_sklearn`.
|
|
189
|
+
|
|
190
|
+
PARAMETERS:
|
|
191
|
+
model_name:
|
|
192
|
+
Required Argument.
|
|
193
|
+
Specifies the name of the model to be loaded.
|
|
194
|
+
Types: str
|
|
195
|
+
|
|
196
|
+
RETURNS:
|
|
197
|
+
The opensource object wrapper.
|
|
198
|
+
|
|
199
|
+
RAISES:
|
|
200
|
+
TeradataMlException if model with name "model_name" does not exist.
|
|
201
|
+
|
|
202
|
+
EXAMPLE:
|
|
203
|
+
# sklearn example.
|
|
204
|
+
>>> from teradataml import td_sklearn
|
|
205
|
+
>>> # Load the model saved in Vantage. Note that the model is saved using
|
|
206
|
+
>>> # `deploy()` of exposed interface object (like `td_sklearn`) or
|
|
207
|
+
>>> # `_OpenSourceObjectWrapper` Object.
|
|
208
|
+
>>> model = td_sklearn.load("linreg_model_ver_1")
|
|
209
|
+
>>> model
|
|
210
|
+
LinearRegression(normalize=True)
|
|
211
|
+
|
|
212
|
+
# lightgbm example.
|
|
213
|
+
>>> from teradataml import td_lightgbm
|
|
214
|
+
>>> # Load the model saved in Vantage. Note that the model is saved using
|
|
215
|
+
>>> # `deploy()` of exposed interface object (like `td_lightgbm`) or
|
|
216
|
+
>>> # `_OpenSourceObjectWrapper` Object.
|
|
217
|
+
>>> model = td_lightgbm.load("lgb_model_ver_1")
|
|
218
|
+
>>> model
|
|
219
|
+
LGBMClassifier()
|
|
220
|
+
"""
|
|
221
|
+
return self._object_wrapper._load(model_name)
|
|
222
|
+
|
|
223
|
+
|
|
224
|
+
class Sklearn(_OpenSource):
|
|
225
|
+
"""
|
|
226
|
+
DESCRIPTION:
|
|
227
|
+
Interface object to access exposed classes and functions of scikit-learn
|
|
228
|
+
opensource package. All the classes and functions can be run and attributes
|
|
229
|
+
can be accessed using the object created by "td_sklearn" interface object.
|
|
230
|
+
Refer Teradata Python Package User Guide for more information about OpenML
|
|
231
|
+
and exposed interface objects.
|
|
232
|
+
|
|
233
|
+
PARAMETERS:
|
|
234
|
+
None
|
|
235
|
+
|
|
236
|
+
RETURNS:
|
|
237
|
+
None
|
|
238
|
+
|
|
239
|
+
EXAMPLES:
|
|
240
|
+
# Load example data.
|
|
241
|
+
>>> load_example_data("openml", ["test_classification", "test_prediction"])
|
|
242
|
+
>>> df = DataFrame("test_classification")
|
|
243
|
+
>>> df.head(3)
|
|
244
|
+
col2 col3 col4 label
|
|
245
|
+
col1
|
|
246
|
+
-2.560430 0.402232 -1.100742 -2.959588 0
|
|
247
|
+
-3.587546 0.291819 -1.850169 -4.331055 0
|
|
248
|
+
-3.697436 1.576888 -0.461220 -3.598652 0
|
|
249
|
+
|
|
250
|
+
>>> df_test = DataFrame("test_prediction")
|
|
251
|
+
>>> df_test.head(3)
|
|
252
|
+
col2 col3 col4
|
|
253
|
+
col1
|
|
254
|
+
-2.560430 0.402232 -1.100742 -2.959588
|
|
255
|
+
-3.587546 0.291819 -1.850169 -4.331055
|
|
256
|
+
-3.697436 1.576888 -0.461220 -3.598652
|
|
257
|
+
|
|
258
|
+
|
|
259
|
+
# Get the feature and label data.
|
|
260
|
+
>>> df_x_clasif = df.select(df.columns[:-1])
|
|
261
|
+
>>> df_y_clasif = df.select(df.columns[-1])
|
|
262
|
+
|
|
263
|
+
>>> from teradataml import td_sklearn
|
|
264
|
+
>>> dt_cl = td_sklearn.DecisionTreeClassifier(random_state=0)
|
|
265
|
+
>>> dt_cl
|
|
266
|
+
DecisionTreeClassifier(random_state=0)
|
|
267
|
+
|
|
268
|
+
# Set the paramaters.
|
|
269
|
+
>>> dt_cl.set_params(random_state=2, max_features="sqrt")
|
|
270
|
+
DecisionTreeClassifier(max_features='sqrt', random_state=2)
|
|
271
|
+
|
|
272
|
+
# Get the paramaters.
|
|
273
|
+
>>> dt_cl.get_params()
|
|
274
|
+
{'ccp_alpha': 0.0,
|
|
275
|
+
'class_weight': None,
|
|
276
|
+
'criterion': 'gini',
|
|
277
|
+
'max_depth': None,
|
|
278
|
+
'max_features': 'sqrt',
|
|
279
|
+
'max_leaf_nodes': None,
|
|
280
|
+
'min_impurity_decrease': 0.0,
|
|
281
|
+
'min_impurity_split': None,
|
|
282
|
+
'min_samples_leaf': 1,
|
|
283
|
+
'min_samples_split': 2,
|
|
284
|
+
'min_weight_fraction_leaf': 0.0,
|
|
285
|
+
'random_state': 2,
|
|
286
|
+
'splitter': 'best'}
|
|
287
|
+
|
|
288
|
+
# Train the model using fit().
|
|
289
|
+
>>> dt_cl.fit(df_x_clasif, df_y_clasif)
|
|
290
|
+
DecisionTreeClassifier(max_features='sqrt', random_state=2)
|
|
291
|
+
|
|
292
|
+
# Perform prediction.
|
|
293
|
+
>>> dt_cl.predict(df_test)
|
|
294
|
+
col1 col2 col3 col4 decisiontreeclassifier_predict_1
|
|
295
|
+
0 1.105026 -1.949894 -1.537164 0.073171 1
|
|
296
|
+
1 1.878349 0.577289 1.795746 2.762539 1
|
|
297
|
+
2 -1.130582 -0.020296 -0.710234 -1.440991 0
|
|
298
|
+
3 -1.243781 0.280821 -0.437933 -1.379770 0
|
|
299
|
+
4 -0.509793 0.492659 0.248207 -0.309591 1
|
|
300
|
+
5 -0.345538 -2.296723 -2.811807 -1.993113 0
|
|
301
|
+
6 0.709217 -1.481740 -1.247431 -0.109140 0
|
|
302
|
+
7 -1.621842 1.713381 0.955084 -0.885921 1
|
|
303
|
+
8 2.425481 -0.549892 0.851440 2.689135 1
|
|
304
|
+
9 1.780375 -1.749949 -0.900142 1.061262 0
|
|
305
|
+
|
|
306
|
+
# Perform scoring.
|
|
307
|
+
>>> dt_cl.score(df_x_clasif, df_y_clasif)
|
|
308
|
+
score
|
|
309
|
+
0 1.0
|
|
310
|
+
|
|
311
|
+
# Access few attributes.
|
|
312
|
+
>>> dt_cl.classes_
|
|
313
|
+
array([0., 1.])
|
|
314
|
+
|
|
315
|
+
>>> dt_cl.feature_importances_
|
|
316
|
+
array([0.06945187, 0.02 , 0.67786339, 0.23268474])
|
|
317
|
+
|
|
318
|
+
>>> dt_cl.max_features_
|
|
319
|
+
2
|
|
320
|
+
"""
|
|
321
|
+
def __init__(self):
|
|
322
|
+
super().__init__()
|
|
323
|
+
self._modules = _SKL_MODULES
|
|
324
|
+
self._object_wrapper = _SkLearnObjectWrapper
|
|
325
|
+
self._function_wrapper = _SKLearnFunctionWrapper
|
|
326
|
+
|
|
327
|
+
|
|
328
|
+
class Lightgbm(_OpenSource):
|
|
329
|
+
"""
|
|
330
|
+
DESCRIPTION:
|
|
331
|
+
Interface object to access exposed classes and functions of lightgbm
|
|
332
|
+
opensource package. All the classes and functions can be run and attributes
|
|
333
|
+
can be accessed using the object created by "td_lightgbm" interface object.
|
|
334
|
+
Refer Teradata Python Package User Guide for more information about OpenML
|
|
335
|
+
and exposed interface objects.
|
|
336
|
+
|
|
337
|
+
PARAMETERS:
|
|
338
|
+
None
|
|
339
|
+
|
|
340
|
+
RETURNS:
|
|
341
|
+
None
|
|
342
|
+
|
|
343
|
+
EXAMPLES:
|
|
344
|
+
# Load example data.
|
|
345
|
+
>>> load_example_data("openml", ["test_classification"])
|
|
346
|
+
>>> df = DataFrame("test_classification")
|
|
347
|
+
>>> df.head(3)
|
|
348
|
+
col2 col3 col4 label
|
|
349
|
+
col1
|
|
350
|
+
-2.560430 0.402232 -1.100742 -2.959588 0
|
|
351
|
+
-3.587546 0.291819 -1.850169 -4.331055 0
|
|
352
|
+
-3.697436 1.576888 -0.461220 -3.598652 0
|
|
353
|
+
|
|
354
|
+
# Get the feature and label data.
|
|
355
|
+
>>> df_x = df.select(df.columns[:-1])
|
|
356
|
+
>>> df_y = df.select(df.columns[-1])
|
|
357
|
+
|
|
358
|
+
>>> from teradataml import td_lightgbm
|
|
359
|
+
|
|
360
|
+
# Example 1: Train the model using train() function.
|
|
361
|
+
# Create lightgbm Dataset object.
|
|
362
|
+
>>> lgbm_data = td_lightgbm.Dataset(data=df_x, label=df_y, free_raw_data=False)
|
|
363
|
+
>>> lgbm_data
|
|
364
|
+
<lightgbm.basic.Dataset object at ...>
|
|
365
|
+
|
|
366
|
+
# Train the model.
|
|
367
|
+
>>> model = td_lightgbm.train(params={}, train_set=lgbm_data, num_boost_round=30, valid_sets=[lgbm_data])
|
|
368
|
+
[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000043 seconds.
|
|
369
|
+
You can set `force_row_wise=true` to remove the overhead.
|
|
370
|
+
And if memory is not enough, you can set `force_col_wise=true`.
|
|
371
|
+
[LightGBM] [Info] Total Bins 532
|
|
372
|
+
[LightGBM] [Info] Number of data points in the train set: 400, number of used features: 4
|
|
373
|
+
[1] valid_0's l2: 0.215811
|
|
374
|
+
[2] valid_0's l2: 0.188138
|
|
375
|
+
[3] valid_0's l2: 0.166146
|
|
376
|
+
...
|
|
377
|
+
...
|
|
378
|
+
[29] valid_0's l2: 0.042255
|
|
379
|
+
[30] valid_0's l2: 0.0416953
|
|
380
|
+
>>> model
|
|
381
|
+
<lightgbm.basic.Booster object at ...>
|
|
382
|
+
|
|
383
|
+
# Example 2: Train the model using LGBMClassifier sklearn object.
|
|
384
|
+
# Create lightgbm sklearn object.
|
|
385
|
+
>>> lgbm_cl = td_lightgbm.LGBMClassifier()
|
|
386
|
+
>>> lgbm_cl
|
|
387
|
+
LGBMClassifier()
|
|
388
|
+
|
|
389
|
+
# Fit/train the model using fit() function.
|
|
390
|
+
>>> lgbm_cl.fit(df_x, df_y)
|
|
391
|
+
LGBMClassifier()
|
|
392
|
+
|
|
393
|
+
# Perform prediction.
|
|
394
|
+
>>> lgbm_cl.predict(df_x).head(3)
|
|
395
|
+
col1 col2 col3 col4 lgbmclassifier_predict_1
|
|
396
|
+
0 1.105026 -1.949894 -1.537164 0.073171 1
|
|
397
|
+
1 1.878349 0.577289 1.795746 2.762539 1
|
|
398
|
+
2 -1.130582 -0.020296 -0.710234 -1.440991 0
|
|
399
|
+
|
|
400
|
+
# Access attributes.
|
|
401
|
+
>>> lgbm_cl.feature_importances_
|
|
402
|
+
array([ 0, 20, 10, 10])
|
|
403
|
+
"""
|
|
404
|
+
|
|
405
|
+
def __init__(self):
|
|
406
|
+
super().__init__()
|
|
407
|
+
self._modules = _LIGHTGBM_MODULES
|
|
408
|
+
self._object_wrapper = _LightgbmBoosterWrapper
|
|
409
|
+
self._function_wrapper = _LightgbmFunctionWrapper
|
|
410
|
+
|
|
411
|
+
def _assign_object_wrapper(self, module, class_name):
|
|
412
|
+
"""
|
|
413
|
+
Assigns the appropriate object wrapper based on the module and class name.
|
|
414
|
+
"""
|
|
415
|
+
|
|
416
|
+
if module == "lightgbm.basic" and class_name == "Booster":
|
|
417
|
+
self._object_wrapper = _LightgbmBoosterWrapper
|
|
418
|
+
|
|
419
|
+
if module == "lightgbm.basic" and class_name == "Dataset":
|
|
420
|
+
self._object_wrapper = _LightgbmDatasetWrapper
|
|
421
|
+
|
|
422
|
+
if module == "lightgbm.sklearn":
|
|
423
|
+
self._object_wrapper = _LightgbmSklearnWrapper
|
|
424
|
+
|
|
425
|
+
def __getattr__(self, name):
|
|
426
|
+
|
|
427
|
+
def __get_module(*c, **kwargs):
|
|
428
|
+
module, class_instance = self._get_module_and_class_instance(name)
|
|
429
|
+
|
|
430
|
+
# If the attribute is a function, then return the function object.
|
|
431
|
+
if type(class_instance).__name__ == "function":
|
|
432
|
+
kwargs.update(zip(class_instance.__code__.co_varnames, c))
|
|
433
|
+
|
|
434
|
+
if module == "lightgbm.callback":
|
|
435
|
+
return {"module": module, "func_name": name, "kwargs": kwargs}
|
|
436
|
+
|
|
437
|
+
return self._function_wrapper(module_name=module, func_name=name)(**kwargs)
|
|
438
|
+
|
|
439
|
+
kwargs.update(zip(class_instance.__init__.__code__.co_varnames[1:], c))
|
|
440
|
+
|
|
441
|
+
all_args = {"module_name": module, "class_name": name, "kwargs": kwargs}
|
|
442
|
+
self._assign_object_wrapper(module, name)
|
|
443
|
+
|
|
444
|
+
return self._object_wrapper(**all_args)
|
|
445
|
+
|
|
446
|
+
return __get_module
|
|
447
|
+
|
|
448
|
+
def deploy(self, model_name, model, replace_if_exists=False):
|
|
449
|
+
# Docstring of parent class also contain examples of lightgbm.
|
|
450
|
+
module = model.__module__ if hasattr(model, "__module__") else None
|
|
451
|
+
class_name = model.__class__.__name__ if hasattr(model, "__class__") else None
|
|
452
|
+
|
|
453
|
+
if module is None or class_name is None:
|
|
454
|
+
raise ValueError("The model object is not supported for deployment.")
|
|
455
|
+
|
|
456
|
+
self._assign_object_wrapper(module, class_name)
|
|
457
|
+
|
|
458
|
+
return self._object_wrapper._deploy(model_name=model_name,
|
|
459
|
+
model=model,
|
|
460
|
+
replace_if_exists=replace_if_exists)
|
|
461
|
+
|
|
462
|
+
|
|
463
|
+
td_sklearn = Sklearn()
|
|
464
|
+
td_lightgbm = Lightgbm()
|
|
@@ -13,30 +13,35 @@
|
|
|
13
13
|
#
|
|
14
14
|
# ##################################################################
|
|
15
15
|
|
|
16
|
-
from enum import Enum
|
|
17
|
-
from teradataml import VARCHAR, BLOB
|
|
18
16
|
from dataclasses import dataclass, field
|
|
19
|
-
from
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Any, Dict, Optional, Tuple
|
|
19
|
+
|
|
20
|
+
from teradataml import BLOB, VARCHAR
|
|
21
|
+
|
|
22
|
+
_SKL_MODULES = ["sklearn.calibration", "sklearn.cluster", "sklearn.compose", "sklearn.covariance",
|
|
23
|
+
"sklearn.decomposition", "sklearn.discriminant_analysis",
|
|
24
|
+
"sklearn.dummy", "sklearn.ensemble", "sklearn.feature_extraction", "sklearn.feature_selection",
|
|
25
|
+
"sklearn.gaussian_process", "sklearn.impute", "sklearn.isotonic", "sklearn.kernel_approximation",
|
|
26
|
+
"sklearn.kernel_ridge", "sklearn.linear_model", "sklearn.manifold", "sklearn.mixture",
|
|
27
|
+
"sklearn.model_selection", "sklearn.multiclass", "sklearn.multioutput", "sklearn.naive_bayes",
|
|
28
|
+
"sklearn.neighbors", "sklearn.neural_network", "sklearn.preprocessing", "sklearn.random_projection",
|
|
29
|
+
"sklearn.semi_supervised", "sklearn.svm", "sklearn.tree", "sklearn.pipeline", "sklearn.cross_decomposition",
|
|
30
|
+
"sklearn.gaussian_process.kernels", "sklearn.metrics"]
|
|
31
|
+
_LIGHTGBM_MODULES = ["lightgbm.basic", "lightgbm.callback", "lightgbm.compat", "lightgbm.engine", "lightgbm.sklearn"]
|
|
32
|
+
# "lightgbm.cv", "lightgbm.dask",
|
|
31
33
|
|
|
32
34
|
class OpenSourcePackage(Enum):
|
|
33
35
|
SKLEARN = "sklearn"
|
|
36
|
+
LIGHTGBM = "lightgbm"
|
|
34
37
|
|
|
35
38
|
@classmethod
|
|
36
39
|
def values(cls):
|
|
37
40
|
return [item.value for item in cls]
|
|
38
41
|
|
|
39
42
|
|
|
43
|
+
_packages_verified_in_vantage = {} # Used to ensure check for python and python packages done only once per package.
|
|
44
|
+
|
|
40
45
|
@dataclass
|
|
41
46
|
class OpensourceModels:
|
|
42
47
|
"""Dataclass for Opensource Models details."""
|
|
@@ -46,6 +51,8 @@ class OpensourceModels:
|
|
|
46
51
|
pos_args: Tuple[Any] = tuple() # Positional arguments used for model creation.
|
|
47
52
|
key_args: Dict[str, Any] = field(default_factory=dict) # Keyword arguments used for model creation.
|
|
48
53
|
fit_partition_columns_non_default: Optional[str] = None # Columns used for partitioning.
|
|
54
|
+
osml_module: Optional[str] = None # Module of corresponding wrapper class.
|
|
55
|
+
osml_class: Optional[str] = None # Corresponding wrapper class name.
|
|
49
56
|
|
|
50
57
|
# Model table details used by opensource BYOM.
|
|
51
58
|
_OSML_MODELS_TABLE_NAME = "opensourceml_models"
|