teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +193 -1
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +25 -18
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +20 -2
- teradataml/analytics/utils.py +15 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +341 -112
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +84 -42
- teradataml/automl/data_transformation.py +69 -33
- teradataml/automl/feature_engineering.py +76 -9
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +35 -14
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +122 -63
- teradataml/common/messagecodes.py +14 -3
- teradataml/common/messages.py +8 -4
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +366 -74
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +348 -86
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +45 -29
- teradataml/dataframe/data_transfer.py +72 -46
- teradataml/dataframe/dataframe.py +642 -166
- teradataml/dataframe/dataframe_utils.py +167 -22
- teradataml/dataframe/functions.py +135 -20
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +330 -78
- teradataml/dbutils/dbutils.py +556 -140
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -26
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +307 -40
- teradataml/scriptmgmt/lls_utils.py +428 -145
- teradataml/store/__init__.py +2 -3
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +48 -19
- teradataml/table_operators/Script.py +23 -2
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +49 -1
- teradataml/utils/internal_buffer.py +38 -0
- teradataml/utils/validators.py +377 -62
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,471 @@
|
|
|
1
|
+
# External libraries
|
|
2
|
+
import pandas as pd
|
|
3
|
+
|
|
4
|
+
# Teradata libraries
|
|
5
|
+
from teradataml import db_drop_table
|
|
6
|
+
from teradataml.common.constants import AutoMLConstants as aml_const
|
|
7
|
+
from teradataml.common.messages import Messages, MessageCodes
|
|
8
|
+
from teradataml.dataframe.dataframe import DataFrame
|
|
9
|
+
from teradataml.dataframe.copy_to import copy_to_sql
|
|
10
|
+
from teradataml.utils.validators import _Validators
|
|
11
|
+
|
|
12
|
+
# AutoML Internal libraries
|
|
13
|
+
from teradataml import AutoML, TeradataMlException
|
|
14
|
+
|
|
15
|
+
class AutoDataPrep(AutoML):
|
|
16
|
+
def __init__(self,
|
|
17
|
+
task_type = "Default",
|
|
18
|
+
verbose = 0,
|
|
19
|
+
**kwargs):
|
|
20
|
+
"""
|
|
21
|
+
DESCRIPTION:
|
|
22
|
+
AutoDataPrep simplifies the data preparation process by automating the different aspects of
|
|
23
|
+
data cleaning and transformation, enabling seamless exploration, transformation, and optimization of datasets.
|
|
24
|
+
|
|
25
|
+
PARAMETERS:
|
|
26
|
+
task_type:
|
|
27
|
+
Optional Argument.
|
|
28
|
+
Specifies the task type for AutoDataPrep, whether to apply regression OR classification
|
|
29
|
+
on the provided dataset. If user wants AutoDataPrep() to decide the task type automatically,
|
|
30
|
+
then it should be set to "Default".
|
|
31
|
+
Default Value: "Default"
|
|
32
|
+
Permitted Values: "Regression", "Classification", "Default"
|
|
33
|
+
Types: str
|
|
34
|
+
|
|
35
|
+
verbose:
|
|
36
|
+
Optional Argument.
|
|
37
|
+
Specifies the detailed execution steps based on verbose level.
|
|
38
|
+
Default Value: 0
|
|
39
|
+
Permitted Values:
|
|
40
|
+
* 0: prints the progress bar.
|
|
41
|
+
* 1: prints the execution steps.
|
|
42
|
+
* 2: prints the intermediate data between the execution of each step.
|
|
43
|
+
Types: int
|
|
44
|
+
|
|
45
|
+
**kwargs:
|
|
46
|
+
Specifies the additional arguments for AutoDataPrep. Below
|
|
47
|
+
are the additional arguments:
|
|
48
|
+
custom_config_file:
|
|
49
|
+
Optional Argument.
|
|
50
|
+
Specifies the path of JSON file in case of custom run.
|
|
51
|
+
Types: str
|
|
52
|
+
|
|
53
|
+
volatile:
|
|
54
|
+
Optional Argument.
|
|
55
|
+
Specifies whether to put the interim results of the
|
|
56
|
+
functions in a volatile table or not. When set to
|
|
57
|
+
True, results are stored in a volatile table,
|
|
58
|
+
otherwise not.
|
|
59
|
+
Default Value: False
|
|
60
|
+
Types: bool
|
|
61
|
+
|
|
62
|
+
persist:
|
|
63
|
+
Optional Argument.
|
|
64
|
+
Specifies whether to persist the interim results of the
|
|
65
|
+
functions in a table or not. When set to True,
|
|
66
|
+
results are persisted in a table; otherwise,
|
|
67
|
+
results are garbage collected at the end of the
|
|
68
|
+
session.
|
|
69
|
+
Default Value: False
|
|
70
|
+
Types: bool
|
|
71
|
+
|
|
72
|
+
RETURNS:
|
|
73
|
+
Instance of AutoDataPrep.
|
|
74
|
+
|
|
75
|
+
RAISES:
|
|
76
|
+
TeradataMlException, TypeError, ValueError
|
|
77
|
+
|
|
78
|
+
EXAMPLES:
|
|
79
|
+
# Notes:
|
|
80
|
+
# 1. Get the connection to Vantage to execute the function.
|
|
81
|
+
# 2. One must import the required functions mentioned in
|
|
82
|
+
# the example from teradataml.
|
|
83
|
+
# 3. Function raises error if not supported on the Vantage
|
|
84
|
+
# user is connected to.
|
|
85
|
+
|
|
86
|
+
# Load the example data.
|
|
87
|
+
>>> load_example_data("teradataml", "titanic")
|
|
88
|
+
|
|
89
|
+
# Create teradataml DataFrames.
|
|
90
|
+
>>> titanic = DataFrame.from_table("titanic")
|
|
91
|
+
|
|
92
|
+
# Example 1: Run AutoDataPrep for classification problem.
|
|
93
|
+
# Scenario: Titanic dataset is used to predict the survival of passengers.
|
|
94
|
+
|
|
95
|
+
# Create an instance of AutoDataPrep.
|
|
96
|
+
>>> aprep_obj = AutoDataPrep(task_type="Classification", verbose=2)
|
|
97
|
+
|
|
98
|
+
# Fit the data.
|
|
99
|
+
>>> aprep_obj.fit(titanic, titanic.survived)
|
|
100
|
+
|
|
101
|
+
# Retrieve the data after Auto Data Preparation.
|
|
102
|
+
>>> datas = aprep_obj.get_data()
|
|
103
|
+
|
|
104
|
+
"""
|
|
105
|
+
# Initialize the AutoML object
|
|
106
|
+
super().__init__(task_type=task_type,
|
|
107
|
+
verbose=verbose,
|
|
108
|
+
**kwargs)
|
|
109
|
+
|
|
110
|
+
# Setting the attrubutes for AutoDataPrep
|
|
111
|
+
super().__setattr__("_auto_dataprep", True)
|
|
112
|
+
super().__setattr__("model_list", [])
|
|
113
|
+
super().__setattr__("_phases", ["1. Feature Exploration ->",
|
|
114
|
+
"2. Feature Engineering ->",
|
|
115
|
+
"3. Data Preparation"])
|
|
116
|
+
super().__setattr__("_progressbar_prefix", 'Auto Data Prep:')
|
|
117
|
+
|
|
118
|
+
def fit(self,
|
|
119
|
+
data,
|
|
120
|
+
target_column):
|
|
121
|
+
"""
|
|
122
|
+
DESCRIPTION:
|
|
123
|
+
Function to fit the data for Auto Data Preparation.
|
|
124
|
+
|
|
125
|
+
PARAMETERS:
|
|
126
|
+
data:
|
|
127
|
+
Required Argument.
|
|
128
|
+
Specifies the input data to be used for Auto Data Preparation.
|
|
129
|
+
Types: DataFrame
|
|
130
|
+
|
|
131
|
+
target_column:
|
|
132
|
+
Required Argument.
|
|
133
|
+
Specifies the target column to be used for Auto Data Preparation.
|
|
134
|
+
Types: str
|
|
135
|
+
|
|
136
|
+
RETURNS:
|
|
137
|
+
None
|
|
138
|
+
|
|
139
|
+
RAISES:
|
|
140
|
+
TeradataMlException, ValueError
|
|
141
|
+
|
|
142
|
+
EXAMPLES:
|
|
143
|
+
# Notes:
|
|
144
|
+
# 1. Get the connection to Vantage to execute the function.
|
|
145
|
+
# 2. One must import the required functions mentioned in
|
|
146
|
+
# the example from teradataml.
|
|
147
|
+
# 3. Function raises error if not supported on the Vantage
|
|
148
|
+
# user is connected to.
|
|
149
|
+
|
|
150
|
+
# Load the example data.
|
|
151
|
+
>>> load_example_data("teradataml", "titanic")
|
|
152
|
+
|
|
153
|
+
# Create teradataml DataFrames.
|
|
154
|
+
>>> titanic = DataFrame.from_table("titanic")
|
|
155
|
+
|
|
156
|
+
# Example 1: Run AutoDataPrep for classification problem.
|
|
157
|
+
# Scenario: Titanic dataset is used to predict the survival of passengers.
|
|
158
|
+
|
|
159
|
+
# Create an instance of AutoDataPrep.
|
|
160
|
+
>>> aprep_obj = AutoDataPrep(task_type="Classification", verbose=2)
|
|
161
|
+
|
|
162
|
+
# Fit the data.
|
|
163
|
+
>>> aprep_obj.fit(titanic, titanic.survived)
|
|
164
|
+
|
|
165
|
+
"""
|
|
166
|
+
# Fit the data using AutoML object
|
|
167
|
+
super().fit(data, target_column)
|
|
168
|
+
|
|
169
|
+
|
|
170
|
+
def get_data(self):
|
|
171
|
+
"""
|
|
172
|
+
DESCRIPTION:
|
|
173
|
+
Function to retrieve the data after Auto Data Preparation.
|
|
174
|
+
|
|
175
|
+
RETURNS:
|
|
176
|
+
Dictionary of DataFrames containing the data after Auto Data Preparation.
|
|
177
|
+
|
|
178
|
+
RAISES:
|
|
179
|
+
TeradataMlException
|
|
180
|
+
|
|
181
|
+
EXAMPLES:
|
|
182
|
+
# Notes:
|
|
183
|
+
# 1. Get the connection to Vantage to execute the function.
|
|
184
|
+
# 2. One must import the required functions mentioned in
|
|
185
|
+
# the example from teradataml.
|
|
186
|
+
# 3. Function raises error if not supported on the Vantage
|
|
187
|
+
# user is connected to.
|
|
188
|
+
|
|
189
|
+
# Load the example data.
|
|
190
|
+
>>> load_example_data("teradataml", "titanic")
|
|
191
|
+
|
|
192
|
+
# Create teradataml DataFrames.
|
|
193
|
+
>>> titanic = DataFrame.from_table("titanic")
|
|
194
|
+
|
|
195
|
+
# Example 1: Run AutoDataPrep for classification problem.
|
|
196
|
+
# Scenario: Titanic dataset is used to predict the survival of passengers.
|
|
197
|
+
|
|
198
|
+
# Create an instance of AutoDataPrep.
|
|
199
|
+
>>> aprep_obj = AutoDataPrep(task_type="Classification", verbose=2)
|
|
200
|
+
|
|
201
|
+
# Fit the data.
|
|
202
|
+
>>> aprep_obj.fit(titanic, titanic.survived)
|
|
203
|
+
|
|
204
|
+
# Retrieve the data after Auto Data Preparation.
|
|
205
|
+
>>> datas = aprep_obj.get_data()
|
|
206
|
+
"""
|
|
207
|
+
# Raise error if fit is not called before get_data
|
|
208
|
+
_Validators._validate_dependent_method("get_data", "fit", self._is_fit_called)
|
|
209
|
+
|
|
210
|
+
datas = {}
|
|
211
|
+
for key, val in self.table_name_mapping.items():
|
|
212
|
+
datas[key] = DataFrame(val)
|
|
213
|
+
|
|
214
|
+
return datas
|
|
215
|
+
|
|
216
|
+
def deploy(self, table_name):
|
|
217
|
+
"""
|
|
218
|
+
DESCRIPTION:
|
|
219
|
+
Deploy the AutoDataPrep generated data to the database,
|
|
220
|
+
i.e., saves the data in the database.
|
|
221
|
+
|
|
222
|
+
PARAMETERS:
|
|
223
|
+
table_name:
|
|
224
|
+
Required Argument.
|
|
225
|
+
Specifies the name of the table to store the information
|
|
226
|
+
of deployed datasets in the database.
|
|
227
|
+
Types: str
|
|
228
|
+
|
|
229
|
+
RETURNS:
|
|
230
|
+
None
|
|
231
|
+
|
|
232
|
+
RAISES:
|
|
233
|
+
TeradataMlException, ValueError
|
|
234
|
+
|
|
235
|
+
EXAMPLES:
|
|
236
|
+
# Create an instance of the AutoDataPrep.
|
|
237
|
+
# Perform fit() operation on the AutoDataPrep object.
|
|
238
|
+
# Deploy the data to the table.
|
|
239
|
+
|
|
240
|
+
From teradataml import AutoDataPrep
|
|
241
|
+
# Load the example data.
|
|
242
|
+
>>> load_example_data("teradataml", "titanic")
|
|
243
|
+
>>> titanic = DataFrame.from_table("titanic")
|
|
244
|
+
|
|
245
|
+
# Create an instance of AutoDataPrep.
|
|
246
|
+
>>> aprep_obj = AutoDataPrep(task_type="Classification", verbose=2)
|
|
247
|
+
|
|
248
|
+
# Fit the data.
|
|
249
|
+
>>> aprep_obj.fit(titanic, titanic.survived)
|
|
250
|
+
|
|
251
|
+
# Deploy the data to the table.
|
|
252
|
+
>>> aprep_obj.deploy("table_name")
|
|
253
|
+
"""
|
|
254
|
+
|
|
255
|
+
# Appending arguments to list for validation
|
|
256
|
+
arg_info_matrix = []
|
|
257
|
+
arg_info_matrix.append(["table_name", table_name, True, (str), True])
|
|
258
|
+
|
|
259
|
+
# Validating the arguments
|
|
260
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
261
|
+
|
|
262
|
+
# Raise Error if fit is not called before deploy
|
|
263
|
+
_Validators._validate_dependent_method("deploy", "fit", self._is_fit_called)
|
|
264
|
+
|
|
265
|
+
if self.table_name_mapping is not None and \
|
|
266
|
+
isinstance(self.table_name_mapping, dict):
|
|
267
|
+
|
|
268
|
+
tab_map = {}
|
|
269
|
+
# If persist is False, then generate permanent table
|
|
270
|
+
if not self.kwargs.get("persist", False):
|
|
271
|
+
for key, val in self.table_name_mapping.items():
|
|
272
|
+
# Perist the data
|
|
273
|
+
per_name = self._create_per_result_table(prefix='{}_'.format(self.target_column),
|
|
274
|
+
persist_result_table=val)
|
|
275
|
+
# Store the table name mapping
|
|
276
|
+
tab_map[key] = per_name
|
|
277
|
+
else:
|
|
278
|
+
# Tables are already persisted
|
|
279
|
+
tab_map = self.table_name_mapping
|
|
280
|
+
data = pd.DataFrame(list(tab_map.items()), columns=['Feature_Selection_Method', 'Table_Name'])
|
|
281
|
+
|
|
282
|
+
# Save the data to the database
|
|
283
|
+
copy_to_sql(df= data, table_name=table_name, if_exists="replace")
|
|
284
|
+
print("Data deployed successfully to the table: ", table_name)
|
|
285
|
+
return
|
|
286
|
+
|
|
287
|
+
# Raise error if data is not found or
|
|
288
|
+
# table_name_mapping is not a dictionary/ None
|
|
289
|
+
err = Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
|
|
290
|
+
"'deploy' method", \
|
|
291
|
+
"Data not found to deploy.")
|
|
292
|
+
raise TeradataMlException(err, MessageCodes.EXECUTION_FAILED)
|
|
293
|
+
|
|
294
|
+
def load(self, table_name):
|
|
295
|
+
"""
|
|
296
|
+
DESCRIPTION:
|
|
297
|
+
Loads the AutoDataPrep generated data from the database
|
|
298
|
+
in the session to use it for model training or scoring.
|
|
299
|
+
|
|
300
|
+
PARAMETERS:
|
|
301
|
+
table_name:
|
|
302
|
+
Required Argument.
|
|
303
|
+
Specifies the name of the table containing the information
|
|
304
|
+
of deployed datasets in the database.
|
|
305
|
+
Types: str
|
|
306
|
+
|
|
307
|
+
RETURNS:
|
|
308
|
+
Dictionary of DataFrames containing the datas generated from AutoDataPrep.
|
|
309
|
+
|
|
310
|
+
RAISES:
|
|
311
|
+
TeradataMlException, ValueError
|
|
312
|
+
|
|
313
|
+
EXAMPLES:
|
|
314
|
+
# Create an instance of the AutoDataPrep.
|
|
315
|
+
# Load the data from the table.
|
|
316
|
+
|
|
317
|
+
# Create an instance of AutoDataPrep.
|
|
318
|
+
>>> aprep_obj = AutoDataPrep()
|
|
319
|
+
|
|
320
|
+
# Load the data from the table.
|
|
321
|
+
>>> data = aprep_obj.load("table_name")
|
|
322
|
+
|
|
323
|
+
# Retrieve the data
|
|
324
|
+
>>> print(data)
|
|
325
|
+
"""
|
|
326
|
+
|
|
327
|
+
# Appending arguments to list for validation
|
|
328
|
+
arg_info_matrix = []
|
|
329
|
+
arg_info_matrix.append(["table_name", table_name, True, (str), True])
|
|
330
|
+
|
|
331
|
+
# Validating the arguments
|
|
332
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
333
|
+
|
|
334
|
+
# Load the data from the table
|
|
335
|
+
load_df = DataFrame(table_name)
|
|
336
|
+
|
|
337
|
+
data = {}
|
|
338
|
+
# Load the data into dictionary
|
|
339
|
+
for mtd, tab_name in load_df.get_values():
|
|
340
|
+
try:
|
|
341
|
+
data[mtd] = DataFrame(tab_name)
|
|
342
|
+
except Exception as e:
|
|
343
|
+
print(f"Error while loading {mtd} table: ", e)
|
|
344
|
+
data[mtd] = None
|
|
345
|
+
continue
|
|
346
|
+
|
|
347
|
+
return data
|
|
348
|
+
|
|
349
|
+
|
|
350
|
+
def delete_data(self,
|
|
351
|
+
table_name,
|
|
352
|
+
fs_method=None):
|
|
353
|
+
"""
|
|
354
|
+
DESCRIPTION:
|
|
355
|
+
Deletes the deployed datasets from the database.
|
|
356
|
+
|
|
357
|
+
PARAMETERS:
|
|
358
|
+
table_name:
|
|
359
|
+
Required Argument.
|
|
360
|
+
Specifies the name of the table containing the deployed datasets.
|
|
361
|
+
Types: str
|
|
362
|
+
|
|
363
|
+
fs_method:
|
|
364
|
+
Optional Argument.
|
|
365
|
+
Specifies the name of the feature selection method to delete from the
|
|
366
|
+
deployed datasets.
|
|
367
|
+
Default Value: None
|
|
368
|
+
Permitted Values: "lasso", "rfe", "pca"
|
|
369
|
+
Note:
|
|
370
|
+
* If "fs_method" is None, then method deletes all the deployed datasets.
|
|
371
|
+
Types: str or list of str
|
|
372
|
+
|
|
373
|
+
RETURNS:
|
|
374
|
+
None
|
|
375
|
+
|
|
376
|
+
RAISES:
|
|
377
|
+
TeradataMlException
|
|
378
|
+
|
|
379
|
+
EXAMPLES:
|
|
380
|
+
# Create an instance of the AutoDataPrep.
|
|
381
|
+
# Fit the data.
|
|
382
|
+
# Deploy the data to the table.
|
|
383
|
+
# Remove the deployed data from the table.
|
|
384
|
+
|
|
385
|
+
# Example 1: Remove the deployed data from the table within the AutoDataPrep object.
|
|
386
|
+
|
|
387
|
+
from teradataml import AutoDataPrep
|
|
388
|
+
# Load the example data.
|
|
389
|
+
>>> load_example_data("teradataml", "titanic")
|
|
390
|
+
>>> titanic = DataFrame.from_table("titanic")
|
|
391
|
+
|
|
392
|
+
# Create an instance of AutoDataPrep.
|
|
393
|
+
>>> aprep_obj = AutoDataPrep(task_type="Classification", verbose=2)
|
|
394
|
+
|
|
395
|
+
# fit the data.
|
|
396
|
+
>>> aprep_obj.fit(titanic, titanic.survived)
|
|
397
|
+
|
|
398
|
+
# Deploy the datas to the database.
|
|
399
|
+
>>> aprep_obj.deploy("table_name")
|
|
400
|
+
|
|
401
|
+
# Remove lasso deployed data from the table.
|
|
402
|
+
>>> aprep_obj.delete_data("table_name", fs_method="lasso")
|
|
403
|
+
|
|
404
|
+
# Example 2: Remove the deployed data from the table using different instance of AutoDataPrep object.
|
|
405
|
+
# Create an instance of AutoDataPrep.
|
|
406
|
+
>>> aprep_obj2 = AutoDataPrep()
|
|
407
|
+
|
|
408
|
+
# Remove lasso and pca deployed data from the table.
|
|
409
|
+
>>> aprep_obj2.delete_data("table_name", fs_method=["lasso", "pca"])
|
|
410
|
+
|
|
411
|
+
"""
|
|
412
|
+
# Appending arguments to list for validation
|
|
413
|
+
arg_info_matrix = []
|
|
414
|
+
arg_info_matrix.append(["table_name", table_name, False, (str), True])
|
|
415
|
+
arg_info_matrix.append(["fs_method", fs_method, True, (str, list), True, aml_const.FEATURE_SELECTION_MTDS.value])
|
|
416
|
+
|
|
417
|
+
# Validating the arguments
|
|
418
|
+
_Validators._validate_function_arguments(arg_info_matrix)
|
|
419
|
+
|
|
420
|
+
# Load the data from the table
|
|
421
|
+
df = DataFrame(table_name)
|
|
422
|
+
# Get the values from the loaded DataFrame
|
|
423
|
+
values = df.get_values()
|
|
424
|
+
|
|
425
|
+
if fs_method is None:
|
|
426
|
+
# If fs_method is None, then delete all the tables
|
|
427
|
+
methods = aml_const.FEATURE_SELECTION_MTDS.value
|
|
428
|
+
elif isinstance(fs_method, str):
|
|
429
|
+
# If fs_method is str, then convert it to list
|
|
430
|
+
methods = [fs_method]
|
|
431
|
+
else:
|
|
432
|
+
# If fs_method is list, then use it as it is
|
|
433
|
+
methods = fs_method
|
|
434
|
+
# Convert the methods to lower case
|
|
435
|
+
methods = [method.lower() for method in methods]
|
|
436
|
+
|
|
437
|
+
filtered_data = []
|
|
438
|
+
remaining_data = []
|
|
439
|
+
# Filter the values based on the fs_method
|
|
440
|
+
for row in values:
|
|
441
|
+
if any(cond in row[0] for cond in methods):
|
|
442
|
+
filtered_data.append(row)
|
|
443
|
+
else:
|
|
444
|
+
remaining_data.append(row)
|
|
445
|
+
|
|
446
|
+
# Drop the tables
|
|
447
|
+
err_flag = False
|
|
448
|
+
for row in filtered_data:
|
|
449
|
+
tab_name = row[1]
|
|
450
|
+
mtd = row[0]
|
|
451
|
+
try:
|
|
452
|
+
db_drop_table(tab_name)
|
|
453
|
+
print(f"Removed {mtd} table successfully.")
|
|
454
|
+
except Exception as e:
|
|
455
|
+
print(f"Error while removing {mtd} table: ", e)
|
|
456
|
+
remaining_data.append(row)
|
|
457
|
+
err_flag = True
|
|
458
|
+
continue
|
|
459
|
+
|
|
460
|
+
if err_flag:
|
|
461
|
+
# Print message if error occured while removing deployed data
|
|
462
|
+
print("Error occured while removing deployed data.")
|
|
463
|
+
|
|
464
|
+
if len(remaining_data) > 0:
|
|
465
|
+
rem_data = pd.DataFrame(remaining_data, columns=['Feature_Selection_Method', 'Table_Name'])
|
|
466
|
+
# Save the data to the database
|
|
467
|
+
copy_to_sql(df= rem_data, table_name=table_name, if_exists="replace")
|
|
468
|
+
elif not err_flag:
|
|
469
|
+
# Drop the whole table if no data is remaining
|
|
470
|
+
db_drop_table(table_name)
|
|
471
|
+
print("Deployed data removed successfully.")
|