teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +183 -0
- teradataml/__init__.py +6 -3
- teradataml/_version.py +2 -2
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +275 -40
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +1 -0
- teradataml/analytics/json_parser/utils.py +17 -21
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +10 -2
- teradataml/analytics/table_operator/__init__.py +3 -2
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +62 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1553 -319
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +276 -319
- teradataml/automl/data_transformation.py +163 -81
- teradataml/automl/feature_engineering.py +402 -239
- teradataml/automl/feature_exploration.py +9 -2
- teradataml/automl/model_evaluation.py +48 -51
- teradataml/automl/model_training.py +291 -189
- teradataml/catalog/byom.py +8 -8
- teradataml/catalog/model_cataloging_utils.py +1 -1
- teradataml/clients/auth_client.py +133 -0
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/aed_utils.py +3 -2
- teradataml/common/constants.py +48 -6
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +156 -120
- teradataml/common/messagecodes.py +6 -1
- teradataml/common/messages.py +3 -1
- teradataml/common/sqlbundle.py +1 -1
- teradataml/common/utils.py +103 -11
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +121 -31
- teradataml/data/advertising.csv +201 -0
- teradataml/data/bank_marketing.csv +11163 -0
- teradataml/data/bike_sharing.csv +732 -0
- teradataml/data/boston2cols.csv +721 -0
- teradataml/data/breast_cancer.csv +570 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/customer_segmentation_test.csv +2628 -0
- teradataml/data/customer_segmentation_train.csv +8069 -0
- teradataml/data/dataframe_example.json +10 -0
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
- teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
- teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
- teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
- teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
- teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
- teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
- teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
- teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
- teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
- teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
- teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
- teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/glm_example.json +28 -1
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/housing_train_segment.csv +201 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/insect2Cols.csv +61 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
- teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
- teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
- teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
- teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
- teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
- teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
- teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
- teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
- teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
- teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
- teradataml/data/kmeans_example.json +5 -0
- teradataml/data/kmeans_table.csv +10 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/onehot_encoder_train.csv +4 -0
- teradataml/data/openml_example.json +29 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scale_attributes.csv +3 -0
- teradataml/data/scale_example.json +52 -1
- teradataml/data/scale_input_part_sparse.csv +31 -0
- teradataml/data/scale_input_partitioned.csv +16 -0
- teradataml/data/scale_input_sparse.csv +11 -0
- teradataml/data/scale_parameters.csv +3 -0
- teradataml/data/scripts/deploy_script.py +21 -2
- teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
- teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
- teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
- teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/templates/open_source_ml.json +2 -1
- teradataml/data/teradataml_example.json +97 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/data/ztest_example.json +16 -0
- teradataml/dataframe/copy_to.py +9 -4
- teradataml/dataframe/data_transfer.py +125 -64
- teradataml/dataframe/dataframe.py +575 -57
- teradataml/dataframe/dataframe_utils.py +47 -9
- teradataml/dataframe/fastload.py +273 -90
- teradataml/dataframe/functions.py +339 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +740 -18
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +324 -18
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
- teradataml/options/__init__.py +16 -5
- teradataml/options/configure.py +39 -6
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +26 -19
- teradataml/scriptmgmt/lls_utils.py +120 -16
- teradataml/table_operators/Script.py +4 -5
- teradataml/table_operators/TableOperator.py +160 -26
- teradataml/table_operators/table_operator_util.py +88 -41
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/validators.py +41 -3
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
|
@@ -21,6 +21,7 @@ from teradataml import ColumnSummary, CategoricalSummary, GetFutileColumns
|
|
|
21
21
|
from teradataml import OutlierFilterFit, OutlierFilterTransform
|
|
22
22
|
from teradataml.hyperparameter_tuner.utils import _ProgressBar
|
|
23
23
|
from teradataml.common.messages import Messages, MessageCodes
|
|
24
|
+
from teradataml import display as dp
|
|
24
25
|
|
|
25
26
|
def _is_terminal():
|
|
26
27
|
"""
|
|
@@ -158,13 +159,14 @@ class _FeatureExplore:
|
|
|
158
159
|
Internal function displays the column summary of categorical column such as
|
|
159
160
|
datatype, null count, non null count, zero count.
|
|
160
161
|
"""
|
|
162
|
+
dp.max_rows = self.data.shape[1]
|
|
161
163
|
# Column Summary of all columns of dataset
|
|
162
164
|
obj = ColumnSummary(data=self.data,
|
|
163
|
-
target_columns=self.data.columns
|
|
164
|
-
volatile=True)
|
|
165
|
+
target_columns=self.data.columns)
|
|
165
166
|
self._display_msg(msg='\nColumn Summary:',
|
|
166
167
|
data=obj.result,
|
|
167
168
|
show_data=True)
|
|
169
|
+
dp.max_rows = 10
|
|
168
170
|
|
|
169
171
|
def _categorical_summary(self,
|
|
170
172
|
categorical_columns=None):
|
|
@@ -503,6 +505,11 @@ class _FeatureExplore:
|
|
|
503
505
|
progress_bar.update(msg=msg, data=col_lst if col_lst else data if data is not None else None,
|
|
504
506
|
progress=False,
|
|
505
507
|
ipython=not self.terminal_print)
|
|
508
|
+
# Displaying shape of data
|
|
509
|
+
if data is not None:
|
|
510
|
+
progress_bar.update(msg=f'{data.shape[0]} rows X {data.shape[1]} columns',
|
|
511
|
+
progress=False,
|
|
512
|
+
ipython=not self.terminal_print)
|
|
506
513
|
# If an inline message is provided instead
|
|
507
514
|
elif inline_msg:
|
|
508
515
|
# Update the progress bar with the inline message
|
|
@@ -15,9 +15,11 @@
|
|
|
15
15
|
|
|
16
16
|
# Python libraries
|
|
17
17
|
import time
|
|
18
|
+
import ast
|
|
18
19
|
|
|
19
20
|
# Teradata libraries
|
|
20
21
|
from teradataml.dataframe.dataframe import DataFrame
|
|
22
|
+
from teradataml.automl.model_training import _ModelTraining
|
|
21
23
|
|
|
22
24
|
|
|
23
25
|
class _ModelEvaluator:
|
|
@@ -38,12 +40,12 @@ class _ModelEvaluator:
|
|
|
38
40
|
Types: teradataml Dataframe
|
|
39
41
|
|
|
40
42
|
target_column:
|
|
41
|
-
Required
|
|
43
|
+
Required Argument.
|
|
42
44
|
Specifies the target column present inside the dataset.
|
|
43
45
|
Types: str
|
|
44
46
|
|
|
45
47
|
task_type:
|
|
46
|
-
Required
|
|
48
|
+
Required Argument.
|
|
47
49
|
Specifies the task type for AutoML, whether to apply regresion OR classification
|
|
48
50
|
on the provived dataset.
|
|
49
51
|
Default Value: "Regression"
|
|
@@ -55,7 +57,12 @@ class _ModelEvaluator:
|
|
|
55
57
|
self.target_column = target_column
|
|
56
58
|
self.task_type = task_type
|
|
57
59
|
|
|
58
|
-
def model_evaluation(self,
|
|
60
|
+
def model_evaluation(self,
|
|
61
|
+
rank,
|
|
62
|
+
table_name_mapping,
|
|
63
|
+
data_node_id,
|
|
64
|
+
target_column_ind = True,
|
|
65
|
+
get_metrics = False):
|
|
59
66
|
"""
|
|
60
67
|
DESCRIPTION:
|
|
61
68
|
Function performs the model evaluation on the specified rank in leaderborad.
|
|
@@ -71,25 +78,32 @@ class _ModelEvaluator:
|
|
|
71
78
|
Specifies the mapping of train,test table names.
|
|
72
79
|
Types: dict
|
|
73
80
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
Specifies
|
|
77
|
-
|
|
78
|
-
Types: bool
|
|
81
|
+
data_node_id:
|
|
82
|
+
Required Argument.
|
|
83
|
+
Specifies the test data node id.
|
|
84
|
+
Types: str
|
|
79
85
|
|
|
80
86
|
target_column_ind:
|
|
81
87
|
Optional Argument.
|
|
82
88
|
Specifies whether target column is present in the dataset or not.
|
|
89
|
+
Default Value: True
|
|
90
|
+
Types: bool
|
|
91
|
+
|
|
92
|
+
get_metrics:
|
|
93
|
+
Optional Argument.
|
|
94
|
+
Specifies whether to return metrics or not.
|
|
83
95
|
Default Value: False
|
|
96
|
+
Types: bool
|
|
84
97
|
|
|
85
98
|
RETURNS:
|
|
86
99
|
tuple containing, performance metrics and predicitions of specified rank ML model.
|
|
87
100
|
|
|
88
101
|
"""
|
|
89
|
-
# Setting
|
|
90
|
-
self.test_data_ind = test_data_ind
|
|
102
|
+
# Setting target column indicator
|
|
91
103
|
self.target_column_ind = target_column_ind
|
|
92
104
|
self.table_name_mapping = table_name_mapping
|
|
105
|
+
self.data_node_id = data_node_id
|
|
106
|
+
self.get_metrics = get_metrics
|
|
93
107
|
|
|
94
108
|
# Return predictions only if test data is present and target column is not present
|
|
95
109
|
return self._evaluator(rank)
|
|
@@ -113,51 +127,34 @@ class _ModelEvaluator:
|
|
|
113
127
|
"""
|
|
114
128
|
# Extracting model using rank
|
|
115
129
|
model = self.model_info.loc[rank]
|
|
130
|
+
|
|
131
|
+
ml_name = self.model_info.loc[rank]['MODEL_ID'].split('_')[0]
|
|
116
132
|
|
|
117
133
|
# Defining eval_params
|
|
118
|
-
eval_params =
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
# eval_params for Classification
|
|
122
|
-
if self.task_type != "Regression":
|
|
123
|
-
# XGboost
|
|
124
|
-
if model['Name'] == 'xgboost':
|
|
125
|
-
eval_params['model_type'] = 'Classification'
|
|
126
|
-
eval_params['object_order_column'] = ['task_index', 'tree_num', 'iter','class_num', 'tree_order']
|
|
127
|
-
else:
|
|
128
|
-
# DF,KNN,SVM,GLM
|
|
129
|
-
eval_params['output_prob'] = True
|
|
130
|
-
else:
|
|
131
|
-
# eval_params for Regression in XGboost
|
|
132
|
-
if model['Name'] == 'xgboost':
|
|
133
|
-
eval_params['model_type'] = 'Regression'
|
|
134
|
-
eval_params['object_order_column'] = ['task_index', 'tree_num', 'iter', 'tree_order']
|
|
135
|
-
|
|
134
|
+
eval_params = _ModelTraining._eval_params_generation(ml_name,
|
|
135
|
+
self.target_column,
|
|
136
|
+
self.task_type)
|
|
136
137
|
|
|
137
|
-
#
|
|
138
|
-
test = DataFrame(self.table_name_mapping['{}
|
|
138
|
+
# Extracting test data for evaluation based on data node id
|
|
139
|
+
test = DataFrame(self.table_name_mapping[self.data_node_id]['{}_new_test'.format(model['FEATURE_SELECTION'])])
|
|
139
140
|
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
test = DataFrame(self.table_name_mapping['{}_test'.format(model['Feature selection'])])
|
|
144
|
-
else:
|
|
145
|
-
test = DataFrame(self.table_name_mapping['{}_new_test'.format(model['Feature selection'])])
|
|
146
|
-
|
|
147
|
-
print(model['Name'], model['Feature selection'])
|
|
141
|
+
print("\nFollowing model is being picked for evaluation:")
|
|
142
|
+
print("Model ID :", model['MODEL_ID'],
|
|
143
|
+
"\nFeature Selection Method :",model['FEATURE_SELECTION'])
|
|
148
144
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
145
|
+
if self.task_type.lower() == 'classification':
|
|
146
|
+
params = ast.literal_eval(model['PARAMETERS'])
|
|
147
|
+
eval_params['output_responses'] = params['output_responses']
|
|
148
|
+
|
|
149
|
+
# Mapping data according to model type
|
|
150
|
+
data_map = 'test_data' if ml_name == 'KNN' else 'newdata'
|
|
151
|
+
# Performing evaluation if get_metrics is True else returning predictions
|
|
152
|
+
if self.get_metrics:
|
|
153
|
+
metrics = model['model-obj'].evaluate(**{data_map: test}, **eval_params)
|
|
154
|
+
return metrics
|
|
153
155
|
else:
|
|
154
|
-
#
|
|
155
|
-
if
|
|
156
|
+
# Removing accumulate parameter if target column is not present
|
|
157
|
+
if not self.target_column_ind:
|
|
156
158
|
eval_params.pop("accumulate")
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
# Return both metrics and predictions for all other cases
|
|
160
|
-
metrics = model['model-obj'].evaluate(newdata=test, **eval_params)
|
|
161
|
-
pred = model['model-obj'].predict(newdata=test, **eval_params)
|
|
162
|
-
|
|
163
|
-
return (metrics, pred)
|
|
159
|
+
pred = model['model-obj'].predict(**{data_map: test}, **eval_params)
|
|
160
|
+
return pred
|