teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +315 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +95 -8
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +5 -1
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +51 -6
- teradataml/automl/data_preparation.py +59 -35
- teradataml/automl/data_transformation.py +58 -33
- teradataml/automl/feature_engineering.py +27 -12
- teradataml/automl/model_training.py +73 -46
- teradataml/common/constants.py +88 -29
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +19 -3
- teradataml/common/messages.py +6 -1
- teradataml/common/sqlbundle.py +64 -12
- teradataml/common/utils.py +246 -47
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +161 -27
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/teradataml_example.json +8 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +11 -1
- teradataml/dataframe/dataframe.py +1049 -285
- teradataml/dataframe/dataframe_utils.py +152 -20
- teradataml/dataframe/functions.py +578 -35
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +185 -16
- teradataml/dbutils/dbutils.py +1049 -115
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/_base.py +1466 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
- teradataml/options/__init__.py +54 -38
- teradataml/options/configure.py +131 -27
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +5 -5
- teradataml/scriptmgmt/lls_utils.py +130 -40
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2318 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/table_operators/Apply.py +32 -18
- teradataml/table_operators/Script.py +3 -1
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +51 -2
- teradataml/utils/internal_buffer.py +18 -0
- teradataml/utils/validators.py +99 -8
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_class.py +0 -255
- teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
|
@@ -1,255 +0,0 @@
|
|
|
1
|
-
# ##################################################################
|
|
2
|
-
#
|
|
3
|
-
# Copyright 2023 Teradata. All rights reserved.
|
|
4
|
-
# TERADATA CONFIDENTIAL AND TRADE SECRET
|
|
5
|
-
#
|
|
6
|
-
# Primary Owner: Adithya Avvaru (adithya.avvaru@teradata.com)
|
|
7
|
-
# Secondary Owner: Pankaj Purandare (pankajvinod.purandare@teradata.com)
|
|
8
|
-
#
|
|
9
|
-
# Version: 1.0
|
|
10
|
-
# Function Version: 1.0
|
|
11
|
-
#
|
|
12
|
-
# This file contains classes for Opensource packages like sklearn,
|
|
13
|
-
# lightgbm etc and their corresponding objects.
|
|
14
|
-
#
|
|
15
|
-
# ##################################################################
|
|
16
|
-
|
|
17
|
-
from importlib import import_module
|
|
18
|
-
from teradataml.opensource.sklearn._sklearn_wrapper import _SkLearnObjectWrapper, _SKLearnFunctionWrapper
|
|
19
|
-
from teradataml.opensource.sklearn.constants import _MODULES
|
|
20
|
-
from teradataml.options.configure import configure
|
|
21
|
-
|
|
22
|
-
sklearn_functions = ["k_means"]
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
class _OpenSource:
|
|
26
|
-
"""
|
|
27
|
-
A class to extend teradataml to other open source packages like scikit-learn,
|
|
28
|
-
spark, pytorch, snowflake etc.
|
|
29
|
-
"""
|
|
30
|
-
|
|
31
|
-
def __init__(self):
|
|
32
|
-
self._modules = None
|
|
33
|
-
self._object_wrapper = None
|
|
34
|
-
self._function_wrapper = None
|
|
35
|
-
|
|
36
|
-
def __getattr__(self, name):
|
|
37
|
-
|
|
38
|
-
def __get_module(*c, **kwargs):
|
|
39
|
-
class_instance = None
|
|
40
|
-
module = None
|
|
41
|
-
for module in self._modules:
|
|
42
|
-
lib = import_module(module)
|
|
43
|
-
try:
|
|
44
|
-
class_instance = getattr(lib, name)
|
|
45
|
-
break
|
|
46
|
-
except AttributeError as ex:
|
|
47
|
-
continue
|
|
48
|
-
|
|
49
|
-
if not class_instance:
|
|
50
|
-
raise ValueError(f"The class/function '{name}' does not exist in 'sklearn' modules.")
|
|
51
|
-
|
|
52
|
-
# If the attribute is a function, then return the function object.
|
|
53
|
-
if type(class_instance).__name__ == "function":
|
|
54
|
-
return self._function_wrapper(module_name=module, func_name=name)(*c, **kwargs)
|
|
55
|
-
|
|
56
|
-
return self._object_wrapper(module_name=module, class_name=name,
|
|
57
|
-
pos_args=c, kwargs=kwargs)
|
|
58
|
-
|
|
59
|
-
return __get_module
|
|
60
|
-
|
|
61
|
-
def deploy(self, model_name, model, replace_if_exists=False):
|
|
62
|
-
"""
|
|
63
|
-
DESCRIPTION:
|
|
64
|
-
Deploys the model to Vantage.
|
|
65
|
-
|
|
66
|
-
PARAMETERS:
|
|
67
|
-
model_name:
|
|
68
|
-
Required Argument.
|
|
69
|
-
Specifies the unique name of the model to be deployed.
|
|
70
|
-
Types: str
|
|
71
|
-
|
|
72
|
-
model:
|
|
73
|
-
Required Argument.
|
|
74
|
-
Specifies the teradataml supported opensource model object that is to be deployed.
|
|
75
|
-
Currently supported models are:
|
|
76
|
-
- sklearn
|
|
77
|
-
Types: object
|
|
78
|
-
|
|
79
|
-
replace_if_exists:
|
|
80
|
-
Optional Argument.
|
|
81
|
-
Specifies whether to replace the model if a model with the same name already
|
|
82
|
-
exists in Vantage. If this argument is set to False and a model with the same
|
|
83
|
-
name already exists, then the function raises an exception.
|
|
84
|
-
Default Value: False
|
|
85
|
-
Types: bool
|
|
86
|
-
|
|
87
|
-
RETURNS:
|
|
88
|
-
The opensource object wrapper.
|
|
89
|
-
|
|
90
|
-
RAISES:
|
|
91
|
-
TeradataMLException if model with "model_name" already exists and the argument
|
|
92
|
-
"replace_if_exists" is set to False.
|
|
93
|
-
|
|
94
|
-
EXAMPLES:
|
|
95
|
-
>>> from teradataml import td_sklearn
|
|
96
|
-
>>> from sklearn.linear_model import LinearRegression
|
|
97
|
-
>>> model = LinearRegression(normalize=True)
|
|
98
|
-
|
|
99
|
-
# Example 1: Deploy the model to Vantage.
|
|
100
|
-
>>> lin_reg = td_sklearn.deploy("linreg_model_ver_1", model)
|
|
101
|
-
Model is saved.
|
|
102
|
-
>>> lin_reg
|
|
103
|
-
LinearRegression(normalize=True)
|
|
104
|
-
|
|
105
|
-
# Example 2: Deploy the model to Vantage with the name same as that of model that
|
|
106
|
-
# already existed in Vantage.
|
|
107
|
-
>>> lin_reg = td_sklearn.deploy("linreg_model_ver_1", model, replace_if_exists=True)
|
|
108
|
-
Model is deleted.
|
|
109
|
-
Model is saved.
|
|
110
|
-
>>> lin_reg
|
|
111
|
-
LinearRegression(normalize=True)
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
"""
|
|
115
|
-
return self._object_wrapper._deploy(model_name=model_name,
|
|
116
|
-
model=model,
|
|
117
|
-
replace_if_exists=replace_if_exists)
|
|
118
|
-
|
|
119
|
-
def load(self, model_name):
|
|
120
|
-
"""
|
|
121
|
-
DESCRIPTION:
|
|
122
|
-
Loads the model from Vantage based on the interface object on which this function
|
|
123
|
-
is called.
|
|
124
|
-
For example, if the model in "model_name" argument is statsmodel model, then this
|
|
125
|
-
function raises exception if the interface object is `td_sklearn`.
|
|
126
|
-
|
|
127
|
-
PARAMETERS:
|
|
128
|
-
model_name:
|
|
129
|
-
Required Argument.
|
|
130
|
-
Specifies the name of the model to be loaded.
|
|
131
|
-
Types: str
|
|
132
|
-
|
|
133
|
-
RETURNS:
|
|
134
|
-
The opensource object wrapper.
|
|
135
|
-
|
|
136
|
-
RAISES:
|
|
137
|
-
TeradataMlException if model with name "model_name" does not exist.
|
|
138
|
-
|
|
139
|
-
EXAMPLE:
|
|
140
|
-
>>> from teradataml import td_sklearn
|
|
141
|
-
>>> # Load the model saved in Vantage. Note that the model is saved using
|
|
142
|
-
>>> # `deploy()` of exposed interface object (like `td_sklearn`) or
|
|
143
|
-
>>> # `_OpenSourceObjectWrapper` Object.
|
|
144
|
-
>>> model = td_sklearn.load("linreg_model_ver_1")
|
|
145
|
-
>>> model
|
|
146
|
-
LinearRegression(normalize=True)
|
|
147
|
-
"""
|
|
148
|
-
return self._object_wrapper._load(model_name)
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
class Sklearn(_OpenSource):
|
|
152
|
-
"""
|
|
153
|
-
DESCRIPTION:
|
|
154
|
-
Interface object to access exposed classes and functions of scikit-learn
|
|
155
|
-
opensource package. All the classes and functions can be run and attributes
|
|
156
|
-
can be accessed using the object created by "td_sklearn" interface object.
|
|
157
|
-
Refer Teradata Python Package User Guide for more information about OpenML
|
|
158
|
-
and exposed interface objects.
|
|
159
|
-
|
|
160
|
-
PARAMETERS:
|
|
161
|
-
None
|
|
162
|
-
|
|
163
|
-
RETURNS:
|
|
164
|
-
None
|
|
165
|
-
|
|
166
|
-
EXAMPLES:
|
|
167
|
-
# Load example data.
|
|
168
|
-
>>> load_example_data("openml", ["test_classification", "test_prediction"])
|
|
169
|
-
>>> df = DataFrame("test_classification")
|
|
170
|
-
>>> df.head(3)
|
|
171
|
-
col2 col3 col4 label
|
|
172
|
-
col1
|
|
173
|
-
-2.560430 0.402232 -1.100742 -2.959588 0
|
|
174
|
-
-3.587546 0.291819 -1.850169 -4.331055 0
|
|
175
|
-
-3.697436 1.576888 -0.461220 -3.598652 0
|
|
176
|
-
|
|
177
|
-
>>> df_test = DataFrame("test_prediction")
|
|
178
|
-
>>> df_test.head(3)
|
|
179
|
-
col2 col3 col4
|
|
180
|
-
col1
|
|
181
|
-
-2.560430 0.402232 -1.100742 -2.959588
|
|
182
|
-
-3.587546 0.291819 -1.850169 -4.331055
|
|
183
|
-
-3.697436 1.576888 -0.461220 -3.598652
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
# Get the feature and label data.
|
|
187
|
-
>>> df_x_clasif = df.select(df.columns[:-1])
|
|
188
|
-
>>> df_y_clasif = df.select(df.columns[-1])
|
|
189
|
-
|
|
190
|
-
>>> from teradataml import td_sklearn
|
|
191
|
-
>>> dt_cl = td_sklearn.DecisionTreeClassifier(random_state=0)
|
|
192
|
-
>>> dt_cl
|
|
193
|
-
DecisionTreeClassifier(random_state=0)
|
|
194
|
-
|
|
195
|
-
# Set the paramaters.
|
|
196
|
-
>>> dt_cl.set_params(random_state=2, max_features="sqrt")
|
|
197
|
-
DecisionTreeClassifier(max_features='sqrt', random_state=2)
|
|
198
|
-
|
|
199
|
-
# Get the paramaters.
|
|
200
|
-
>>> dt_cl.get_params()
|
|
201
|
-
{'ccp_alpha': 0.0,
|
|
202
|
-
'class_weight': None,
|
|
203
|
-
'criterion': 'gini',
|
|
204
|
-
'max_depth': None,
|
|
205
|
-
'max_features': 'sqrt',
|
|
206
|
-
'max_leaf_nodes': None,
|
|
207
|
-
'min_impurity_decrease': 0.0,
|
|
208
|
-
'min_impurity_split': None,
|
|
209
|
-
'min_samples_leaf': 1,
|
|
210
|
-
'min_samples_split': 2,
|
|
211
|
-
'min_weight_fraction_leaf': 0.0,
|
|
212
|
-
'random_state': 2,
|
|
213
|
-
'splitter': 'best'}
|
|
214
|
-
|
|
215
|
-
# Train the model using fit().
|
|
216
|
-
>>> dt_cl.fit(df_x_clasif, df_y_clasif)
|
|
217
|
-
DecisionTreeClassifier(max_features='sqrt', random_state=2)
|
|
218
|
-
|
|
219
|
-
# Perform prediction.
|
|
220
|
-
>>> dt_cl.predict(df_test)
|
|
221
|
-
col1 col2 col3 col4 decisiontreeclassifier_predict_1
|
|
222
|
-
0 1.105026 -1.949894 -1.537164 0.073171 1
|
|
223
|
-
1 1.878349 0.577289 1.795746 2.762539 1
|
|
224
|
-
2 -1.130582 -0.020296 -0.710234 -1.440991 0
|
|
225
|
-
3 -1.243781 0.280821 -0.437933 -1.379770 0
|
|
226
|
-
4 -0.509793 0.492659 0.248207 -0.309591 1
|
|
227
|
-
5 -0.345538 -2.296723 -2.811807 -1.993113 0
|
|
228
|
-
6 0.709217 -1.481740 -1.247431 -0.109140 0
|
|
229
|
-
7 -1.621842 1.713381 0.955084 -0.885921 1
|
|
230
|
-
8 2.425481 -0.549892 0.851440 2.689135 1
|
|
231
|
-
9 1.780375 -1.749949 -0.900142 1.061262 0
|
|
232
|
-
|
|
233
|
-
# Perform scoring.
|
|
234
|
-
>>> dt_cl.score(df_x_clasif, df_y_clasif)
|
|
235
|
-
score
|
|
236
|
-
0 1.0
|
|
237
|
-
|
|
238
|
-
# Access few attributes.
|
|
239
|
-
>>> dt_cl.classes_
|
|
240
|
-
array([0., 1.])
|
|
241
|
-
|
|
242
|
-
>>> dt_cl.feature_importances_
|
|
243
|
-
array([0.06945187, 0.02 , 0.67786339, 0.23268474])
|
|
244
|
-
|
|
245
|
-
>>> dt_cl.max_features_
|
|
246
|
-
2
|
|
247
|
-
"""
|
|
248
|
-
def __init__(self):
|
|
249
|
-
super().__init__()
|
|
250
|
-
self._modules = _MODULES
|
|
251
|
-
self._object_wrapper = _SkLearnObjectWrapper
|
|
252
|
-
self._function_wrapper = _SKLearnFunctionWrapper
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
td_sklearn = Sklearn()
|