teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +193 -1
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +25 -18
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +20 -2
- teradataml/analytics/utils.py +15 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +341 -112
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +84 -42
- teradataml/automl/data_transformation.py +69 -33
- teradataml/automl/feature_engineering.py +76 -9
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +35 -14
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +122 -63
- teradataml/common/messagecodes.py +14 -3
- teradataml/common/messages.py +8 -4
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +366 -74
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +348 -86
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +45 -29
- teradataml/dataframe/data_transfer.py +72 -46
- teradataml/dataframe/dataframe.py +642 -166
- teradataml/dataframe/dataframe_utils.py +167 -22
- teradataml/dataframe/functions.py +135 -20
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +330 -78
- teradataml/dbutils/dbutils.py +556 -140
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -26
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +307 -40
- teradataml/scriptmgmt/lls_utils.py +428 -145
- teradataml/store/__init__.py +2 -3
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +48 -19
- teradataml/table_operators/Script.py +23 -2
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +49 -1
- teradataml/utils/internal_buffer.py +38 -0
- teradataml/utils/validators.py +377 -62
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
teradataml/opensource/_class.py
CHANGED
|
@@ -15,10 +15,14 @@
|
|
|
15
15
|
# ##################################################################
|
|
16
16
|
|
|
17
17
|
from importlib import import_module
|
|
18
|
-
|
|
19
|
-
from teradataml.opensource.
|
|
20
|
-
|
|
21
|
-
|
|
18
|
+
|
|
19
|
+
from teradataml.opensource._constants import _LIGHTGBM_MODULES, _SKL_MODULES
|
|
20
|
+
from teradataml.opensource._lightgbm import (_LightgbmSklearnWrapper,
|
|
21
|
+
_LightgbmBoosterWrapper,
|
|
22
|
+
_LightgbmDatasetWrapper,
|
|
23
|
+
_LightgbmFunctionWrapper)
|
|
24
|
+
from teradataml.opensource._sklearn import (_SKLearnFunctionWrapper,
|
|
25
|
+
_SkLearnObjectWrapper)
|
|
22
26
|
|
|
23
27
|
|
|
24
28
|
class _OpenSource:
|
|
@@ -82,6 +86,7 @@ class _OpenSource:
|
|
|
82
86
|
Specifies the teradataml supported opensource model object that is to be deployed.
|
|
83
87
|
Currently supported models are:
|
|
84
88
|
- sklearn
|
|
89
|
+
- lightgbm
|
|
85
90
|
Types: object
|
|
86
91
|
|
|
87
92
|
replace_if_exists:
|
|
@@ -100,6 +105,9 @@ class _OpenSource:
|
|
|
100
105
|
"replace_if_exists" is set to False.
|
|
101
106
|
|
|
102
107
|
EXAMPLES:
|
|
108
|
+
## sklearn examples.
|
|
109
|
+
|
|
110
|
+
# Import required packages and create LinearRegression sklearn object.
|
|
103
111
|
>>> from teradataml import td_sklearn
|
|
104
112
|
>>> from sklearn.linear_model import LinearRegression
|
|
105
113
|
>>> model = LinearRegression(normalize=True)
|
|
@@ -118,6 +126,53 @@ class _OpenSource:
|
|
|
118
126
|
>>> lin_reg
|
|
119
127
|
LinearRegression(normalize=True)
|
|
120
128
|
|
|
129
|
+
## lightgbm examples.
|
|
130
|
+
|
|
131
|
+
# Import required packages and create LGBMClassifier lightGBM object.
|
|
132
|
+
>>> from teradataml import td_lightgbm
|
|
133
|
+
>>> import lightgbm as lgb
|
|
134
|
+
>>> model = lgb.LGBMClassifier()
|
|
135
|
+
|
|
136
|
+
# Example 1: Deploy the LightGBM model to Vantage.
|
|
137
|
+
>>> lgb_model = td_lightgbm.deploy("lgb_model_ver_1", model)
|
|
138
|
+
Model is saved.
|
|
139
|
+
>>> lgb_model
|
|
140
|
+
LGBMClassifier()
|
|
141
|
+
|
|
142
|
+
# Example 2: Deploy the LightGBM model to Vantage with the name same as that of model that
|
|
143
|
+
# already existed in Vantage.
|
|
144
|
+
>>> lgb_model = td_lightgbm.deploy("lgb_model_ver_1", model, replace_if_exists=True)
|
|
145
|
+
Model is deleted.
|
|
146
|
+
Model is saved.
|
|
147
|
+
>>> lgb_model
|
|
148
|
+
LGBMClassifier()
|
|
149
|
+
|
|
150
|
+
# Example 3: Deploy LightGBM model trained locally using train() function to Vantage.
|
|
151
|
+
# Create Dataset object locally, assuming pdf_x and pdf_y are the feature and label pandas
|
|
152
|
+
# DataFrames.
|
|
153
|
+
>>> lgbm_data = lgb.Dataset(data=pdf_x, label=pdf_y, free_raw_data=False)
|
|
154
|
+
>>> lgbm_data
|
|
155
|
+
<lightgbm.basic.Dataset object at ....>
|
|
156
|
+
|
|
157
|
+
# Train the model using train() function.
|
|
158
|
+
>>> model = lgb.train(params={}, train_set=lgbm_data, num_boost_round=30, valid_sets=[lgbm_data])
|
|
159
|
+
[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000043 seconds.
|
|
160
|
+
You can set `force_row_wise=true` to remove the overhead.
|
|
161
|
+
And if memory is not enough, you can set `force_col_wise=true`.
|
|
162
|
+
[LightGBM] [Info] Total Bins 532
|
|
163
|
+
[LightGBM] [Info] Number of data points in the train set: 400, number of used features: 4
|
|
164
|
+
[1] valid_0's l2: 0.215811
|
|
165
|
+
[2] valid_0's l2: 0.188138
|
|
166
|
+
[3] valid_0's l2: 0.166146
|
|
167
|
+
...
|
|
168
|
+
...
|
|
169
|
+
[29] valid_0's l2: 0.042255
|
|
170
|
+
[30] valid_0's l2: 0.0416953
|
|
171
|
+
|
|
172
|
+
# Deploy the model to Vantage.
|
|
173
|
+
>>> lgb_model = td_lightgbm.deploy("lgb_model_ver_2", model)
|
|
174
|
+
>>> lgb_model
|
|
175
|
+
<lightgbm.basic.Booster object at ...>
|
|
121
176
|
|
|
122
177
|
"""
|
|
123
178
|
return self._object_wrapper._deploy(model_name=model_name,
|
|
@@ -145,6 +200,7 @@ class _OpenSource:
|
|
|
145
200
|
TeradataMlException if model with name "model_name" does not exist.
|
|
146
201
|
|
|
147
202
|
EXAMPLE:
|
|
203
|
+
# sklearn example.
|
|
148
204
|
>>> from teradataml import td_sklearn
|
|
149
205
|
>>> # Load the model saved in Vantage. Note that the model is saved using
|
|
150
206
|
>>> # `deploy()` of exposed interface object (like `td_sklearn`) or
|
|
@@ -152,6 +208,15 @@ class _OpenSource:
|
|
|
152
208
|
>>> model = td_sklearn.load("linreg_model_ver_1")
|
|
153
209
|
>>> model
|
|
154
210
|
LinearRegression(normalize=True)
|
|
211
|
+
|
|
212
|
+
# lightgbm example.
|
|
213
|
+
>>> from teradataml import td_lightgbm
|
|
214
|
+
>>> # Load the model saved in Vantage. Note that the model is saved using
|
|
215
|
+
>>> # `deploy()` of exposed interface object (like `td_lightgbm`) or
|
|
216
|
+
>>> # `_OpenSourceObjectWrapper` Object.
|
|
217
|
+
>>> model = td_lightgbm.load("lgb_model_ver_1")
|
|
218
|
+
>>> model
|
|
219
|
+
LGBMClassifier()
|
|
155
220
|
"""
|
|
156
221
|
return self._object_wrapper._load(model_name)
|
|
157
222
|
|
|
@@ -290,18 +355,73 @@ class Lightgbm(_OpenSource):
|
|
|
290
355
|
>>> df_x = df.select(df.columns[:-1])
|
|
291
356
|
>>> df_y = df.select(df.columns[-1])
|
|
292
357
|
|
|
358
|
+
>>> from teradataml import td_lightgbm
|
|
359
|
+
|
|
360
|
+
# Example 1: Train the model using train() function.
|
|
293
361
|
# Create lightgbm Dataset object.
|
|
294
362
|
>>> lgbm_data = td_lightgbm.Dataset(data=df_x, label=df_y, free_raw_data=False)
|
|
295
363
|
>>> lgbm_data
|
|
296
|
-
<lightgbm.basic.Dataset object at
|
|
364
|
+
<lightgbm.basic.Dataset object at ...>
|
|
365
|
+
|
|
366
|
+
# Train the model.
|
|
367
|
+
>>> model = td_lightgbm.train(params={}, train_set=lgbm_data, num_boost_round=30, valid_sets=[lgbm_data])
|
|
368
|
+
[LightGBM] [Warning] Auto-choosing row-wise multi-threading, the overhead of testing was 0.000043 seconds.
|
|
369
|
+
You can set `force_row_wise=true` to remove the overhead.
|
|
370
|
+
And if memory is not enough, you can set `force_col_wise=true`.
|
|
371
|
+
[LightGBM] [Info] Total Bins 532
|
|
372
|
+
[LightGBM] [Info] Number of data points in the train set: 400, number of used features: 4
|
|
373
|
+
[1] valid_0's l2: 0.215811
|
|
374
|
+
[2] valid_0's l2: 0.188138
|
|
375
|
+
[3] valid_0's l2: 0.166146
|
|
376
|
+
...
|
|
377
|
+
...
|
|
378
|
+
[29] valid_0's l2: 0.042255
|
|
379
|
+
[30] valid_0's l2: 0.0416953
|
|
380
|
+
>>> model
|
|
381
|
+
<lightgbm.basic.Booster object at ...>
|
|
382
|
+
|
|
383
|
+
# Example 2: Train the model using LGBMClassifier sklearn object.
|
|
384
|
+
# Create lightgbm sklearn object.
|
|
385
|
+
>>> lgbm_cl = td_lightgbm.LGBMClassifier()
|
|
386
|
+
>>> lgbm_cl
|
|
387
|
+
LGBMClassifier()
|
|
388
|
+
|
|
389
|
+
# Fit/train the model using fit() function.
|
|
390
|
+
>>> lgbm_cl.fit(df_x, df_y)
|
|
391
|
+
LGBMClassifier()
|
|
392
|
+
|
|
393
|
+
# Perform prediction.
|
|
394
|
+
>>> lgbm_cl.predict(df_x).head(3)
|
|
395
|
+
col1 col2 col3 col4 lgbmclassifier_predict_1
|
|
396
|
+
0 1.105026 -1.949894 -1.537164 0.073171 1
|
|
397
|
+
1 1.878349 0.577289 1.795746 2.762539 1
|
|
398
|
+
2 -1.130582 -0.020296 -0.710234 -1.440991 0
|
|
399
|
+
|
|
400
|
+
# Access attributes.
|
|
401
|
+
>>> lgbm_cl.feature_importances_
|
|
402
|
+
array([ 0, 20, 10, 10])
|
|
297
403
|
"""
|
|
298
404
|
|
|
299
405
|
def __init__(self):
|
|
300
406
|
super().__init__()
|
|
301
407
|
self._modules = _LIGHTGBM_MODULES
|
|
302
|
-
self._object_wrapper =
|
|
408
|
+
self._object_wrapper = _LightgbmBoosterWrapper
|
|
303
409
|
self._function_wrapper = _LightgbmFunctionWrapper
|
|
304
410
|
|
|
411
|
+
def _assign_object_wrapper(self, module, class_name):
|
|
412
|
+
"""
|
|
413
|
+
Assigns the appropriate object wrapper based on the module and class name.
|
|
414
|
+
"""
|
|
415
|
+
|
|
416
|
+
if module == "lightgbm.basic" and class_name == "Booster":
|
|
417
|
+
self._object_wrapper = _LightgbmBoosterWrapper
|
|
418
|
+
|
|
419
|
+
if module == "lightgbm.basic" and class_name == "Dataset":
|
|
420
|
+
self._object_wrapper = _LightgbmDatasetWrapper
|
|
421
|
+
|
|
422
|
+
if module == "lightgbm.sklearn":
|
|
423
|
+
self._object_wrapper = _LightgbmSklearnWrapper
|
|
424
|
+
|
|
305
425
|
def __getattr__(self, name):
|
|
306
426
|
|
|
307
427
|
def __get_module(*c, **kwargs):
|
|
@@ -317,23 +437,27 @@ class Lightgbm(_OpenSource):
|
|
|
317
437
|
return self._function_wrapper(module_name=module, func_name=name)(**kwargs)
|
|
318
438
|
|
|
319
439
|
kwargs.update(zip(class_instance.__init__.__code__.co_varnames[1:], c))
|
|
320
|
-
if module == "lightgbm.basic" and name == "Booster":
|
|
321
|
-
return _LightgbmBoosterWrapper(module_name=module, class_name=name, kwargs=kwargs)
|
|
322
|
-
|
|
323
|
-
if module == "lightgbm.sklearn":
|
|
324
|
-
return _LighgbmSklearnWrapper(module_name=module, class_name=name, kwargs=kwargs)
|
|
325
440
|
|
|
326
|
-
|
|
441
|
+
all_args = {"module_name": module, "class_name": name, "kwargs": kwargs}
|
|
442
|
+
self._assign_object_wrapper(module, name)
|
|
443
|
+
|
|
444
|
+
return self._object_wrapper(**all_args)
|
|
327
445
|
|
|
328
446
|
return __get_module
|
|
329
447
|
|
|
330
448
|
def deploy(self, model_name, model, replace_if_exists=False):
|
|
331
|
-
|
|
332
|
-
|
|
449
|
+
# Docstring of parent class also contain examples of lightgbm.
|
|
450
|
+
module = model.__module__ if hasattr(model, "__module__") else None
|
|
451
|
+
class_name = model.__class__.__name__ if hasattr(model, "__class__") else None
|
|
333
452
|
|
|
334
|
-
|
|
335
|
-
raise
|
|
336
|
-
|
|
453
|
+
if module is None or class_name is None:
|
|
454
|
+
raise ValueError("The model object is not supported for deployment.")
|
|
455
|
+
|
|
456
|
+
self._assign_object_wrapper(module, class_name)
|
|
457
|
+
|
|
458
|
+
return self._object_wrapper._deploy(model_name=model_name,
|
|
459
|
+
model=model,
|
|
460
|
+
replace_if_exists=replace_if_exists)
|
|
337
461
|
|
|
338
462
|
|
|
339
463
|
td_sklearn = Sklearn()
|
|
@@ -13,11 +13,11 @@
|
|
|
13
13
|
#
|
|
14
14
|
# ##################################################################
|
|
15
15
|
|
|
16
|
-
from enum import Enum
|
|
17
|
-
from teradataml import VARCHAR, BLOB
|
|
18
16
|
from dataclasses import dataclass, field
|
|
19
|
-
from
|
|
17
|
+
from enum import Enum
|
|
18
|
+
from typing import Any, Dict, Optional, Tuple
|
|
20
19
|
|
|
20
|
+
from teradataml import BLOB, VARCHAR
|
|
21
21
|
|
|
22
22
|
_SKL_MODULES = ["sklearn.calibration", "sklearn.cluster", "sklearn.compose", "sklearn.covariance",
|
|
23
23
|
"sklearn.decomposition", "sklearn.discriminant_analysis",
|
|
@@ -40,6 +40,8 @@ class OpenSourcePackage(Enum):
|
|
|
40
40
|
return [item.value for item in cls]
|
|
41
41
|
|
|
42
42
|
|
|
43
|
+
_packages_verified_in_vantage = {} # Used to ensure check for python and python packages done only once per package.
|
|
44
|
+
|
|
43
45
|
@dataclass
|
|
44
46
|
class OpensourceModels:
|
|
45
47
|
"""Dataclass for Opensource Models details."""
|
|
@@ -49,6 +51,8 @@ class OpensourceModels:
|
|
|
49
51
|
pos_args: Tuple[Any] = tuple() # Positional arguments used for model creation.
|
|
50
52
|
key_args: Dict[str, Any] = field(default_factory=dict) # Keyword arguments used for model creation.
|
|
51
53
|
fit_partition_columns_non_default: Optional[str] = None # Columns used for partitioning.
|
|
54
|
+
osml_module: Optional[str] = None # Module of corresponding wrapper class.
|
|
55
|
+
osml_class: Optional[str] = None # Corresponding wrapper class name.
|
|
52
56
|
|
|
53
57
|
# Model table details used by opensource BYOM.
|
|
54
58
|
_OSML_MODELS_TABLE_NAME = "opensourceml_models"
|
|
@@ -19,19 +19,20 @@ import json
|
|
|
19
19
|
import os
|
|
20
20
|
import pickle
|
|
21
21
|
import warnings
|
|
22
|
-
|
|
23
22
|
from collections import OrderedDict
|
|
24
23
|
from importlib import import_module
|
|
25
24
|
|
|
26
|
-
|
|
25
|
+
import numpy
|
|
27
26
|
import pandas as pd
|
|
28
27
|
from teradatasqlalchemy import BLOB, CLOB, FLOAT
|
|
29
28
|
|
|
30
|
-
from teradataml import _TDML_DIRECTORY,
|
|
29
|
+
from teradataml import (_TDML_DIRECTORY, MessageCodes, Messages,
|
|
30
|
+
TeradataMlException, UtilFuncs, execute_sql)
|
|
31
|
+
from teradataml.opensource._base import (_FunctionWrapper,
|
|
32
|
+
_OpenSourceObjectWrapper)
|
|
33
|
+
from teradataml.opensource._constants import OpenSourcePackage
|
|
34
|
+
from teradataml.opensource._sklearn import _SkLearnObjectWrapper
|
|
31
35
|
from teradataml.opensource._wrapper_utils import _generate_new_name
|
|
32
|
-
from teradataml.opensource.constants import OpenSourcePackage
|
|
33
|
-
from teradataml.opensource.sklearn._sklearn_wrapper import (
|
|
34
|
-
_FunctionWrapper, _OpenSourceObjectWrapper, _SkLearnObjectWrapper)
|
|
35
36
|
|
|
36
37
|
|
|
37
38
|
class _LightgbmDatasetWrapper(_OpenSourceObjectWrapper):
|
|
@@ -43,6 +44,7 @@ class _LightgbmDatasetWrapper(_OpenSourceObjectWrapper):
|
|
|
43
44
|
|
|
44
45
|
file_type = "file_fn_lightgbm"
|
|
45
46
|
self._template_file = "dataset.template"
|
|
47
|
+
self._pkgs = ["lightgbm", "scikit-learn", "numpy", "scipy"]
|
|
46
48
|
super().__init__(model=model, module_name=module_name, class_name=class_name, kwargs=kwargs)
|
|
47
49
|
|
|
48
50
|
self._scripts_path = os.path.join(_TDML_DIRECTORY, "data", "scripts", "lightgbm")
|
|
@@ -221,12 +223,16 @@ class _LightgbmDatasetWrapper(_OpenSourceObjectWrapper):
|
|
|
221
223
|
|
|
222
224
|
return self
|
|
223
225
|
|
|
226
|
+
def deploy(self, model_name, replace_if_exists=False):
|
|
227
|
+
raise ValueError("lightgbm Dataset object is not the model object that can be trained. "
|
|
228
|
+
"Hence, not deployable.")
|
|
224
229
|
|
|
225
230
|
class _LightgbmFunctionWrapper(_FunctionWrapper):
|
|
226
231
|
OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.LIGHTGBM
|
|
227
232
|
def __init__(self, module_name=None, func_name=None):
|
|
228
233
|
file_type = "file_fn_lightgbm"
|
|
229
234
|
template_file = "lightgbm_function.template"
|
|
235
|
+
self._pkgs = ["lightgbm", "scikit-learn", "numpy", "scipy"]
|
|
230
236
|
self._script_file_name = _generate_new_name(type=file_type, extension="py")
|
|
231
237
|
super().__init__(module_name, func_name, file_type=file_type, template_file=template_file)
|
|
232
238
|
self._scripts_path = os.path.join(_TDML_DIRECTORY, "data", "scripts", "lightgbm")
|
|
@@ -462,10 +468,9 @@ class _LightgbmBoosterWrapper(_SkLearnObjectWrapper):
|
|
|
462
468
|
OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.LIGHTGBM
|
|
463
469
|
def __init__(self, model=None, module_name=None, class_name=None, kwargs=None, model_column_name=None):
|
|
464
470
|
file_type = "file_fn_lightgbm_booster"
|
|
465
|
-
|
|
466
471
|
self._model_column_name = model_column_name
|
|
467
|
-
|
|
468
472
|
self.record_evaluation_result = None
|
|
473
|
+
self._pkgs = ["lightgbm", "scikit-learn", "numpy", "scipy"]
|
|
469
474
|
|
|
470
475
|
if model is not None and isinstance(model, dict) and self._model_column_name in model.keys():
|
|
471
476
|
self.record_evaluation_result = model["record_evaluation_result"]
|
|
@@ -500,10 +505,6 @@ class _LightgbmBoosterWrapper(_SkLearnObjectWrapper):
|
|
|
500
505
|
class_obj = getattr(import_module(self.module_name), self.class_name)
|
|
501
506
|
self.modelObj = class_obj(**self.kwargs)
|
|
502
507
|
|
|
503
|
-
def deploy(self, model_name, replace_if_exists=False):
|
|
504
|
-
raise NotImplementedError("The deploy() function is not yet supported for lightgbm OpensourceML objects. \
|
|
505
|
-
Support will be added in future releases.")
|
|
506
|
-
|
|
507
508
|
@property
|
|
508
509
|
def model_info(self):
|
|
509
510
|
"""
|
|
@@ -662,42 +663,6 @@ class _LightgbmBoosterWrapper(_SkLearnObjectWrapper):
|
|
|
662
663
|
"""
|
|
663
664
|
return self.modelObj
|
|
664
665
|
|
|
665
|
-
def _convert_arguments_to_modelObj(self, args, idx_multi_model=None):
|
|
666
|
-
"""
|
|
667
|
-
Internal function to convert all OpensourceML related objects in arguments to
|
|
668
|
-
underlying model objects.
|
|
669
|
-
"""
|
|
670
|
-
if isinstance(args, dict):
|
|
671
|
-
new_args = args.copy() # To avoid updating
|
|
672
|
-
for k, v in new_args.items():
|
|
673
|
-
if isinstance(v, type(self)) or isinstance(v, _LightgbmDatasetWrapper):
|
|
674
|
-
if idx_multi_model is None:
|
|
675
|
-
# single model. This argument (idx_multi_model) is set only when modelObj
|
|
676
|
-
# is multi model.
|
|
677
|
-
new_args[k] = v.modelObj
|
|
678
|
-
else:
|
|
679
|
-
# multi-model. Get appropriate model from modelObj.
|
|
680
|
-
new_args[k] = v.modelObj.iloc[idx_multi_model][self._model_column_name]
|
|
681
|
-
else:
|
|
682
|
-
new_args[k] = v
|
|
683
|
-
return new_args
|
|
684
|
-
|
|
685
|
-
# If args is tuple, convert all elements to underlying model object.
|
|
686
|
-
elif isinstance(args, tuple):
|
|
687
|
-
new_args = tuple()
|
|
688
|
-
for arg in args:
|
|
689
|
-
if isinstance(arg, type(self)) or isinstance(arg, _LightgbmDatasetWrapper):
|
|
690
|
-
if idx_multi_model is None:
|
|
691
|
-
# single model. This argument is set only when modelObj is single model.
|
|
692
|
-
new_args += (arg.modelObj,)
|
|
693
|
-
else:
|
|
694
|
-
# multi-model. Get appropriate model from modelObj.
|
|
695
|
-
new_args += (arg.modelObj.iloc[idx_multi_model][self._model_column_name],)
|
|
696
|
-
else:
|
|
697
|
-
new_args += (arg,)
|
|
698
|
-
return new_args
|
|
699
|
-
return args
|
|
700
|
-
|
|
701
666
|
def __getattr__(self, name):
|
|
702
667
|
def __run_transform(*c, **kwargs):
|
|
703
668
|
# Lightgbm predict method takes other keyword arguments along with data related arguments.
|
|
@@ -729,6 +694,43 @@ class _LightgbmBoosterWrapper(_SkLearnObjectWrapper):
|
|
|
729
694
|
return __run_transform
|
|
730
695
|
return super().__getattr__(name)
|
|
731
696
|
|
|
697
|
+
def _execute_function_locally(self, ten_row_data, feature_columns, label_columns, openml_obj,
|
|
698
|
+
func_name, **kwargs):
|
|
699
|
+
"""
|
|
700
|
+
Function which overrides the existing _execute_function_locally method to handle ValueError
|
|
701
|
+
as argument names are different in lightgbm compared to scikit-learn.
|
|
702
|
+
"""
|
|
703
|
+
X = numpy.array(ten_row_data)
|
|
704
|
+
|
|
705
|
+
if label_columns:
|
|
706
|
+
n_f = len(feature_columns)
|
|
707
|
+
n_c = len(label_columns)
|
|
708
|
+
y = X[:,n_f : n_f + n_c]
|
|
709
|
+
X = X[:,:n_f]
|
|
710
|
+
# predict() now takes 'y' ("label" lightgbm argument) also for it to return the labels
|
|
711
|
+
# from script. Skipping 'y' in local run if passed.
|
|
712
|
+
# Generally, 'y' is passed to return y along with actual output.
|
|
713
|
+
# Since actual lightgbm predict() does not have "label" argument and have other arguments like
|
|
714
|
+
# "start_iteration" etc, local run in try block is resulting into ValueError as
|
|
715
|
+
# "ValueError: The truth value of an array with more than one element is ambiguous.
|
|
716
|
+
# Use a.any() or a.all()" for "start_iteration" argument because the value for "y" is
|
|
717
|
+
# taken for "start_iteration" positional argument. Hence, skipping y in local run.
|
|
718
|
+
try:
|
|
719
|
+
trans_opt = getattr(openml_obj, func_name)(X, y, **kwargs)
|
|
720
|
+
except TypeError as _:
|
|
721
|
+
# Function which does not accept 'y' like predict_proba() raises error like
|
|
722
|
+
# "predict_proba() takes 2 positional arguments but 3 were given".
|
|
723
|
+
trans_opt = getattr(openml_obj, func_name)(X, **kwargs)
|
|
724
|
+
except ValueError as _:
|
|
725
|
+
trans_opt = getattr(openml_obj, func_name)(X, **kwargs)
|
|
726
|
+
else:
|
|
727
|
+
trans_opt = getattr(openml_obj, func_name)(X, **kwargs)
|
|
728
|
+
|
|
729
|
+
if isinstance(trans_opt, numpy.ndarray) and trans_opt.shape == (X.shape[0],):
|
|
730
|
+
trans_opt = trans_opt.reshape(X.shape[0], 1)
|
|
731
|
+
|
|
732
|
+
return trans_opt
|
|
733
|
+
|
|
732
734
|
def _transform(self, **kwargs):
|
|
733
735
|
# Overwriting existing _transform method to handle data related arguments and other
|
|
734
736
|
# keyword arguments.
|
|
@@ -773,16 +775,13 @@ class _LightgbmBoosterWrapper(_SkLearnObjectWrapper):
|
|
|
773
775
|
return self.modelObj.__repr__()
|
|
774
776
|
|
|
775
777
|
|
|
776
|
-
class
|
|
778
|
+
class _LightgbmSklearnWrapper(_SkLearnObjectWrapper):
|
|
777
779
|
OPENSOURCE_PACKAGE_NAME = OpenSourcePackage.LIGHTGBM
|
|
778
780
|
def __init__(self, model=None, module_name=None, class_name=None, kwargs=None):
|
|
781
|
+
self._pkgs = ["lightgbm", "scikit-learn", "numpy", "scipy"]
|
|
779
782
|
super().__init__(model=model, module_name=module_name, class_name=class_name, kwargs=kwargs)
|
|
780
783
|
self._scripts_path = os.path.join(_TDML_DIRECTORY, "data", "scripts", "lightgbm")
|
|
781
784
|
|
|
782
|
-
def deploy(self, model_name, replace_if_exists=False):
|
|
783
|
-
raise NotImplementedError("The deploy() function is not yet supported for lightgbm OpensourceML objects. \
|
|
784
|
-
Support will be added in future releases.")
|
|
785
|
-
|
|
786
785
|
def set_params(self, **params):
|
|
787
786
|
"""
|
|
788
787
|
Please check the description in Docs/OpensourceML/sklearn.py.
|