PyPI - teradataml - Versions diffs - 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl - Mend

teradataml 20.0.0.0py3-none-any.whl → 20.0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (263) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/LICENSE.pdf +0 -0
teradataml/README.md +183 -0
teradataml/__init__.py +6 -3
teradataml/_version.py +2 -2
teradataml/analytics/__init__.py +3 -2
teradataml/analytics/analytic_function_executor.py +275 -40
teradataml/analytics/analytic_query_generator.py +92 -0
teradataml/analytics/byom/__init__.py +3 -2
teradataml/analytics/json_parser/metadata.py +1 -0
teradataml/analytics/json_parser/utils.py +17 -21
teradataml/analytics/meta_class.py +40 -1
teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
teradataml/analytics/sqle/__init__.py +10 -2
teradataml/analytics/table_operator/__init__.py +3 -2
teradataml/analytics/uaf/__init__.py +21 -2
teradataml/analytics/utils.py +62 -1
teradataml/analytics/valib.py +1 -1
teradataml/automl/__init__.py +1553 -319
teradataml/automl/custom_json_utils.py +139 -61
teradataml/automl/data_preparation.py +276 -319
teradataml/automl/data_transformation.py +163 -81
teradataml/automl/feature_engineering.py +402 -239
teradataml/automl/feature_exploration.py +9 -2
teradataml/automl/model_evaluation.py +48 -51
teradataml/automl/model_training.py +291 -189
teradataml/catalog/byom.py +8 -8
teradataml/catalog/model_cataloging_utils.py +1 -1
teradataml/clients/auth_client.py +133 -0
teradataml/clients/pkce_client.py +1 -1
teradataml/common/aed_utils.py +3 -2
teradataml/common/constants.py +48 -6
teradataml/common/deprecations.py +13 -7
teradataml/common/garbagecollector.py +156 -120
teradataml/common/messagecodes.py +6 -1
teradataml/common/messages.py +3 -1
teradataml/common/sqlbundle.py +1 -1
teradataml/common/utils.py +103 -11
teradataml/common/wrapper_utils.py +1 -1
teradataml/context/context.py +121 -31
teradataml/data/advertising.csv +201 -0
teradataml/data/bank_marketing.csv +11163 -0
teradataml/data/bike_sharing.csv +732 -0
teradataml/data/boston2cols.csv +721 -0
teradataml/data/breast_cancer.csv +570 -0
teradataml/data/complaints_test_tokenized.csv +353 -0
teradataml/data/complaints_tokens_model.csv +348 -0
teradataml/data/covid_confirm_sd.csv +83 -0
teradataml/data/customer_segmentation_test.csv +2628 -0
teradataml/data/customer_segmentation_train.csv +8069 -0
teradataml/data/dataframe_example.json +10 -0
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
teradataml/data/dwt2d_dataTable.csv +65 -0
teradataml/data/dwt_dataTable.csv +8 -0
teradataml/data/dwt_filterTable.csv +3 -0
teradataml/data/finance_data4.csv +13 -0
teradataml/data/glm_example.json +28 -1
teradataml/data/grocery_transaction.csv +19 -0
teradataml/data/housing_train_segment.csv +201 -0
teradataml/data/idwt2d_dataTable.csv +5 -0
teradataml/data/idwt_dataTable.csv +8 -0
teradataml/data/idwt_filterTable.csv +3 -0
teradataml/data/insect2Cols.csv +61 -0
teradataml/data/interval_data.csv +5 -0
teradataml/data/jsons/paired_functions.json +14 -0
teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
teradataml/data/kmeans_example.json +5 -0
teradataml/data/kmeans_table.csv +10 -0
teradataml/data/load_example_data.py +8 -2
teradataml/data/naivebayestextclassifier_example.json +1 -1
teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
teradataml/data/onehot_encoder_train.csv +4 -0
teradataml/data/openml_example.json +29 -0
teradataml/data/peppers.png +0 -0
teradataml/data/real_values.csv +14 -0
teradataml/data/sax_example.json +8 -0
teradataml/data/scale_attributes.csv +3 -0
teradataml/data/scale_example.json +52 -1
teradataml/data/scale_input_part_sparse.csv +31 -0
teradataml/data/scale_input_partitioned.csv +16 -0
teradataml/data/scale_input_sparse.csv +11 -0
teradataml/data/scale_parameters.csv +3 -0
teradataml/data/scripts/deploy_script.py +21 -2
teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
teradataml/data/star_pivot.csv +8 -0
teradataml/data/templates/open_source_ml.json +2 -1
teradataml/data/teradataml_example.json +97 -1
teradataml/data/timestamp_data.csv +4 -0
teradataml/data/titanic_dataset_unpivoted.csv +19 -0
teradataml/data/uaf_example.json +55 -1
teradataml/data/unpivot_example.json +15 -0
teradataml/data/url_data.csv +9 -0
teradataml/data/windowdfft.csv +16 -0
teradataml/data/ztest_example.json +16 -0
teradataml/dataframe/copy_to.py +9 -4
teradataml/dataframe/data_transfer.py +125 -64
teradataml/dataframe/dataframe.py +575 -57
teradataml/dataframe/dataframe_utils.py +47 -9
teradataml/dataframe/fastload.py +273 -90
teradataml/dataframe/functions.py +339 -0
teradataml/dataframe/row.py +160 -0
teradataml/dataframe/setop.py +2 -2
teradataml/dataframe/sql.py +740 -18
teradataml/dataframe/window.py +1 -1
teradataml/dbutils/dbutils.py +324 -18
teradataml/geospatial/geodataframe.py +1 -1
teradataml/geospatial/geodataframecolumn.py +1 -1
teradataml/hyperparameter_tuner/optimizer.py +13 -13
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
teradataml/options/__init__.py +16 -5
teradataml/options/configure.py +39 -6
teradataml/options/display.py +2 -2
teradataml/plot/axis.py +4 -4
teradataml/scriptmgmt/UserEnv.py +26 -19
teradataml/scriptmgmt/lls_utils.py +120 -16
teradataml/table_operators/Script.py +4 -5
teradataml/table_operators/TableOperator.py +160 -26
teradataml/table_operators/table_operator_util.py +88 -41
teradataml/table_operators/templates/dataframe_udf.template +63 -0
teradataml/telemetry_utils/__init__.py +0 -0
teradataml/telemetry_utils/queryband.py +52 -0
teradataml/utils/validators.py +41 -3
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0

teradataml/data/dataframe_example.json CHANGED Viewed

@@ -143,5 +143,15 @@
     "smoker" : "varchar(4)",
     "region" : "varchar(12)",
     "charges" : "real"
+   },
+   "grocery_transaction":{
+    "tranid" : "integer",
+    "period" : "varchar(20)",
+    "storeid" : "integer",
+    "region" : "varchar(20)",
+    "item" : "varchar(20)",
+    "sku" : "integer",
+    "category" : "varchar(20)"
    }
 }

teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py CHANGED Viewed

@@ -7,7 +7,9 @@ def OneHotEncodingFit(data=None, is_input_dense=None, target_column=None, catego
         Such as, target attributes and their categorical values to be encoded and other parameters.
         Output of OneHotEncodingFit() function is used by OneHotEncodingTransform() function for encoding
         the input data. It supports inputs in both sparse and dense format.
+        Note:
+            * For input to be considered as sparse input, column names must be provided for
+             'data_partition_column' argument.
     PARAMETERS:
         data:

teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py CHANGED Viewed

@@ -3,6 +3,12 @@ def OneHotEncodingTransform(data=None, object=None, is_input_dense=None, **gener
     DESCRIPTION:
         Function encodes specified attributes and categorical values as one-hot numeric vectors,
         using OneHotEncodingFit() function output.
+        Notes:
+            * In case of sparse input, neither 'data_partition_column' nor
+              'object_partition_column' can be used independently.
+            * In case of dense input, if 'data_partition_column' is having value
+              PartitionKind.ANY, then 'object_partition_column' should have value
+              PartitionKind.DIMENSION.
     PARAMETERS:

teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py CHANGED Viewed

@@ -5,7 +5,11 @@ def OutlierFilterTransform(data=None, object=None, **generic_arguments):
         OutlierFilterTransform() uses the result DataFrame from OutlierFilterFit() function to get
         statistics like median, count of rows, lower percentile and upper percentile for every column
         specified in target columns argument and filters the outliers in the input data.
+        Notes:
+            * Partitioning of input data and model is allowed using 'data_partition_column' and
+              'object_partition_column' only if 'group_columns' are passed while creating model
+              using OutlierFilterFit() function.
+            * Neither 'data_partition_column' nor 'object_partition_column' can be used independently.
     PARAMETERS:
         data:

teradataml/data/docs/sqle/docs_17_20/ANOVA.py CHANGED Viewed

@@ -1,4 +1,6 @@
-def ANOVA(data=None, group_columns=None, alpha=0.05, **generic_arguments):
+def ANOVA(data=None, group_columns=None, alpha=0.05, group_name_column=None,
+          group_value_column=None, group_names=None, num_groups=None,
+          **generic_arguments):
     """
     DESCRIPTION:
         The ANOVA() function performs one-way ANOVA (Analysis of Variance) on
@@ -37,6 +39,42 @@ def ANOVA(data=None, group_columns=None, alpha=0.05, **generic_arguments):
             Default Value: 0.05
             Types: float
+        group_name_column:
+            Optional Argument.
+            Specifies the column name in "data" containing the names of the groups
+            included in the computation.
+            Note:
+                * This argument is used when data contains group names in a column
+                    and group values in another column.
+                * This argument must be used in conjunction with "group_value_column".
+            Types: str
+        group_value_column:
+            Optional Argument.
+             Specifies the column name in "data" containing the values for each group member.
+            Note:
+                * This argument is used when data contains group values in a column
+                    and group names in another column.
+                * This argument must be used in conjunction with "group_name_column".
+            Types: str
+        group_names:
+            Optional Argument.
+            Specifies the names of the groups included in the computation.
+            Note:
+                * This argument is used when data contains group values in a column
+                  and group names in another column.
+            Types: list of Strings (str)
+        num_groups:
+            Optional Argument.
+             Specifies the number of different groups in the "data" included
+            in the computation.
+            Note:
+                * This argument is used when data contains group values in a column
+                  and group names in another column.
+            Types: int
         **generic_arguments:
             Specifies the generic keyword arguments SQLE functions accept. Below
             are the generic keyword arguments:
@@ -97,9 +135,11 @@ def ANOVA(data=None, group_columns=None, alpha=0.05, **generic_arguments):
         # Load the example data.
         load_example_data("teradataml", ["insect_sprays"])
+        load_example_data("ztest", 'insect2Cols')
         # Create teradataml DataFrame objects.
         insect_sprays = DataFrame.from_table("insect_sprays")
+        insect_gp = DataFrame.from_table("insect2Cols")
         # Check the list of available analytic functions.
         display_analytic_functions()
@@ -123,4 +163,24 @@ def ANOVA(data=None, group_columns=None, alpha=0.05, **generic_arguments):
         # Print the result DataFrame.
         print(ANOVA_out_2.result)
+        # Example 3 : Perform one-way anova analysis on a data set with more
+        #             than two groups and group_name_column, group_value_column,
+        #             group_names.
+        ANOVA_out_3 = ANOVA(data = insect_gp,
+                            group_name_column='groupName',
+                            group_value_column='groupValue',
+                            group_names=['groupA', 'groupB', 'groupC'])
+        # Print the result DataFrame.
+        print(ANOVA_out_3.result)
+        # Example 4 : Perform one-way anova analysis on a data set with more
+        #             than two groups and num_groups.
+        ANOVA_out_4 = ANOVA(data = insect_gp,
+                            group_name_column='groupName',
+                            group_value_column='groupValue',
+                            num_groups=6)
+        # Print the result DataFrame.
+        print(ANOVA_out_4.result)
     """

teradataml/data/docs/sqle/docs_17_20/CFilter.py ADDED Viewed

@@ -0,0 +1,132 @@
+def CFilter(data = None, target_column = None, transaction_id_columns = None,
+            partition_columns = None, max_distinct_items = 100,
+            **generic_arguments):
+    """
+    DESCRIPTION:
+        Function calculates several statistical measures of how likely
+        each pair of items is to be purchased together.
+    PARAMETERS:
+        data:
+            Required Argument.
+            Specifies the input teradataml DataFrame.
+            Types: teradataml DataFrame
+        target_column:
+            Required Argument.
+            Specifies name of the column from the "data" containing data for filtration.
+            Types: str
+        transaction_id_columns:
+            Required Argument.
+            Specifies the name of the columns in "data" containing transaction id that defines the groups of items listed
+            in the input columns that are purchased together.
+            Types: str OR list of Strings (str)
+        partition_columns:
+            Optional Argument.
+            Specifies the name of the column in "data" to partition the data on.
+            Types: str OR list of Strings (str)
+        max_distinct_items:
+            Optional Argument.
+            Specifies the maximum size of the item set.
+            Default Value: 100
+            Types: int
+        **generic_arguments:
+            Specifies the generic keyword arguments SQLE functions accept. Below
+            are the generic keyword arguments:
+                persist:
+                    Optional Argument.
+                    Specifies whether to persist the results of the
+                    function in a table or not. When set to True,
+                    results are persisted in a table; otherwise,
+                    results are garbage collected at the end of the
+                    session.
+                    Default Value: False
+                    Types: bool
+                volatile:
+                    Optional Argument.
+                    Specifies whether to put the results of the
+                    function in a volatile table or not. When set to
+                    True, results are stored in a volatile table,
+                    otherwise not.
+                    Default Value: False
+                    Types: bool
+            Function allows the user to partition, hash, order or local
+            order the input data. These generic arguments are available
+            for each argument that accepts teradataml DataFrame as
+            input and can be accessed as:
+                * "<input_data_arg_name>_partition_column" accepts str or
+                    list of str (Strings)
+                * "<input_data_arg_name>_hash_column" accepts str or list
+                    of str (Strings)
+                * "<input_data_arg_name>_order_column" accepts str or list
+                    of str (Strings)
+                * "local_order_<input_data_arg_name>" accepts boolean
+            Note:
+                These generic arguments are supported by teradataml if
+                the underlying SQL Engine function supports, else an
+                exception is raised.
+    RETURNS:
+        Instance of CFilter.
+        Output teradataml DataFrames can be accessed using attribute
+        references, such as CFilterObj.<attribute_name>.
+        Output teradataml DataFrame attribute name is:
+            result
+    RAISES:
+        TeradataMlException, TypeError, ValueError
+    EXAMPLES:
+        # Notes:
+        #     1. Get the connection to Vantage, before importing the
+        #        function in user space.
+        #     2. User can import the function, if it is available on
+        #        Vantage user is connected to.
+        #     3. To check the list of analytic functions available on
+        #        Vantage user connected to, use
+        #        "display_analytic_functions()".
+        # Load the example data.
+        load_example_data("dataframe", ["grocery_transaction"])
+        # Create teradataml DataFrame objects.
+        df = DataFrame.from_table("grocery_transaction")
+        # Check the list of available analytic functions.
+        display_analytic_functions()
+        # Import function CFilter.
+        from teradataml import CFilter
+        # Example 1: CFilter function to calculate the statistical measures
+        #            of how likely each pair of items is to be purchased together, without
+        #            specifying the partition_columns.
+        CFilter_out = CFilter(data=df,
+                              target_column='item',
+                              transaction_id_columns = 'tranid',
+                              max_distinct_items=100)
+        # Print the result DataFrame.
+        print(CFilter_out.result)
+        # Example 2: CFilter function to calculate the statistical measures
+        #            of how likely each pair of items is to be purchased together,
+        #            specifying the partition_columns.
+        CFilter_out2 = CFilter(data=df,
+                               target_column='item',
+                               transaction_id_columns = 'tranid',
+                               partiton_columns='storeid',
+                               max_distinct_items=100)
+        # Print the result DataFrame.
+        print(CFilter_out2.result)
+    """

teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py CHANGED Viewed

@@ -22,6 +22,8 @@ def ColumnTransformer(input_data = None, bincode_fit_data = None, function_fit_d
         User must create the FIT dataframe before using the function and must be provided in the same order
         as in the training data sequence to transform the dataset. The FIT dataframe can have maximum of
         128 columns.
+        Note:
+            * ColumnTransformer() function works only with python 3.6 and above.
     PARAMETERS:

teradataml/data/docs/sqle/docs_17_20/FTest.py CHANGED Viewed

@@ -1,12 +1,8 @@
-def FTest(data = None, alpha = None,
-          first_sample_variance=None,
-          first_sample_column=None,
-          df1=None,
-          second_sample_variance=None,
-          second_sample_column=None,
-          df2=2,
-          alternate_hypothesis='two-tailed',
-          **generic_arguments):
+def FTest(data = None, alpha = 0.05, first_sample_variance=None,
+          first_sample_column=None, df1=None, second_sample_variance=None,
+          second_sample_column=None, df2=2, alternate_hypothesis='two-tailed',
+          sample_name_column=None, sample_value_column=None, first_sample_name=None,
+          second_sample_name=None, **generic_arguments):
     """
     DESCRIPTION:
         The FTest() function performs an F-test, for which the test statistic follows an
@@ -27,47 +23,72 @@ def FTest(data = None, alpha = None,
         alpha:
             Optional Argument.
-            Specifies the probability of rejecting the null hypothesis when it is true
-            (value below which null hypothesis is rejected).
-            "alpha" must be a numeric value in the range [0, 1].
+            Specifies the probability of rejecting the null
+            hypothesis when the null hypothesis is true.
+            Note:
+                * "alpha" must be a numeric value in the range [0, 1].
             Default Value: 0.05
             Types: float
         first_sample_column:
-            Required if "first_sample_variance" is omitted, disallowed otherwise.
-            Specifies the name of the input column that contains the data for the
-            first sample population.
+            Optional Argument.
+            Specifies the first sample column in F-Test.
+            Note:
+                * This argument must be specified with "first_sample_variance" and "df1"
+                  or allowed combination is "first_sample_column" with
+                  "second_sample_variance" and "df2".
+                * This argument cannot be used in conjunction with "sample_name_column"
+                  and "sample_value_column".
             Types: str
         first_sample_variance:
-            Required if "first_sample_column" is omitted, disallowed otherwise.
-            Specifies the variance of the first sample population.
+            Optional Argument.
+            Specifies the first sample variance.
+            Note:
+                * This argument must be specified with "first_sample_column" and "df1"
+                  or other allowed combination is "second_sample_column" with
+                  "first_sample_variance" and "df1".
             Types: float
         df1:
-            Required if "first_sample_column" is omitted, disallowed otherwise.
+            Optional Argument.
             Specifies the degrees of freedom of the first sample.
+            Note:
+                * This argument must be specified with "first_sample_column" and
+                  "first_sample_variance".
             Types: integer
         second_sample_column:
-            Required if "second_sample_variance" is omitted, disallowed otherwise.
-            Specifies the name of the input column that contains the data for the
-            second sample population.
+            Optional Argument.
+            Specifies the second sample column in F-Test.
+            Note:
+                * This argument must be specified with "second_sample_variance" and "df2"
+                  or allowed combination is "second_sample_column" with "first_sample_variance"
+                  and "df1".
+                * This argument cannot be used in conjunction with "sample_name_column"
+                  and "sample_value_column".
             Types: str
         second_sample_variance:
-            Required if "second_sample_column" is omitted, disallowed otherwise.
-            Specifies the variance of the second sample population.
+            Optional Argument.
+            Specifies the second sample variance.
+            Note:
+                * This argument must be specified with "second_sample_column" and "df2"
+                  or allowed combination is "first_sample_column" with
+                  "second_sample_variance" and df2.
             Types: float
         df2:
-            Required if "second_sample_column" is omitted, disallowed otherwise.
-            Specifies the degrees of freedom of the second sample.
+            Optional Argument.
+            Specifies the degree of freedom of the second sample.
+            Note:
+                * This argument must be specified with "second_sample_column" and
+                  "second_sample_variance".
             Types: integer
         alternate_hypothesis:
             Optional Argument.
-            Specifies the alternative hypothesis.
+            Specifies the alternate hypothesis.
             Permitted Values:
                 * lower-tailed - Alternate hypothesis (H 1): μ < μ0.
                 * upper-tailed - Alternate hypothesis (H 1): μ > μ0.
@@ -79,6 +100,27 @@ def FTest(data = None, alpha = None,
             Default Value: two-tailed
             Types: str
+        sample_name_column:
+            Optional Argument.
+            Specifies the column name in "data" containing the names of the samples
+            included in the F-Test.
+            Types: str
+        sample_value_column:
+            Optional Argument.
+            Specifies the column name in "data" containing the values for each sample member.
+            Types: str
+        first_sample_name:
+            Optional Argument.
+            Specifies the name of the first sample included in the F-Test.
+            Types: str
+        second_sample_name:
+            Optional Argument.
+            Specifies the name of the second sample included in the F-Test.
+            Types: str
         **generic_arguments:
             Specifies the generic keyword arguments SQLE functions accept.
             Below are the generic keyword arguments:
@@ -129,9 +171,11 @@ def FTest(data = None, alpha = None,
         # Load the example data.
         load_example_data("teradataml", "titanic")
+        load_example_data("ztest", 'insect2Cols')
         # Create teradataml DataFrame object.
         titanic_data = DataFrame.from_table("titanic")
+        insect_gp = DataFrame.from_table("insect2Cols")
         # Check the list of available analytic functions.
         display_analytic_functions()
@@ -158,4 +202,39 @@ def FTest(data = None, alpha = None,
         # Print the result DataFrame.
         print(obj.result)
+        # Example 3: Run FTest() with sample_name_column, sample_value_column,
+        #            first_sample_name and second_sample_name.
+        obj = FTest(data=insect_gp,
+                    sample_value_column='groupValue',
+                    sample_name_column='groupName',
+                    first_sample_name='groupE',
+                    second_sample_name='groupC')
+        # Print the result DataFrame.
+        print(obj.result)
+        # Example 4: Run FTest() with sample_name_column, sample_value_column,
+        #            first_sample_name and second_sample_name.
+        obj = FTest(data=insect_gp,
+                    sample_value_column='groupValue',
+                    sample_name_column='groupName',
+                    first_sample_name='groupE',
+                    second_sample_variance=100.0,
+                    df2=25)
+        # Print the result DataFrame.
+        print(obj.result)
+        # Example 5: Run FTest() with sample_name_column, sample_value_column,
+        #            second_sample_name and first_sample_variance.
+        obj = FTest(data=insect_gp,
+                    sample_value_column='groupValue',
+                    sample_name_column='groupName',
+                    second_sample_name='groupC',
+                    first_sample_variance=85.0,
+                    df1=19)
+        # Print the result DataFrame.
+        print(obj.result)
     """

teradataml/data/docs/sqle/docs_17_20/GLM.py CHANGED Viewed

@@ -2,7 +2,9 @@ def GLM(formula=None, data=None, input_columns=None, response_column=None, famil
         iter_max=300, batch_size=10, lambda1=0.02, alpha=0.15,
         iter_num_no_change=50, tolerance=0.001, intercept=True, class_weights="0:1.0, 1:1.0",
         learning_rate=None, initial_eta=0.05, decay_rate=0.25, decay_steps=5, momentum=0.0,
-        nesterov=True, local_sgd_iterations=0, **generic_arguments):
+        nesterov=True, local_sgd_iterations=0, stepwise_direction=None, max_steps_num=5,
+        initial_stepwise_columns=None, attribute_data=None, parameter_data=None, iteration_mode="BATCH",
+        partition_column=None, **generic_arguments):
     """
     DESCRIPTION:
         The generalized linear model (GLM) function performs regression and classification
@@ -252,6 +254,55 @@ def GLM(formula=None, data=None, input_columns=None, response_column=None, famil
             Default Value: 0
             Types: int
+        stepwise_direction:
+            Optional Argument.
+            Specify the type of stepwise algorithm to be used.
+            Permitted Values: 'FORWARD', 'BACKWARD', 'BOTH', 'BIDIRECTIONAL'
+            Types: str
+        max_steps_num:
+            Optional Argument.
+            Specifies the maximum number of steps to be used for the Stepwise Algorithm.
+            Note:
+                *  The "max_steps_num" must be in the range [1, 2147483647].
+            Default Value: 5
+            Types: int
+        attribute_data:
+            Optional Argument.
+            Specifies the teradataml DataFrame containing the attribute data.
+            Note:
+                * This is valid when "data_partition_column" argument is used.
+            Types: teradataml DataFrame
+        parameter_data:
+            Optional Argument.
+            Specifies the teradataml DataFrame containing the parameter data.
+            Note:
+                * This is valid when "data_partition_column" argument is used.
+            Types: teradataml DataFrame
+        iteration_mode:
+            Optional Argument.
+            Specifies the iteration mode.
+            Note:
+                * This is valid when "data_partition_column" argument is used.
+            Permitted Values: 'BATCH', 'EPOCH'
+            Default Value: 'BATCH'
+            Types: str
+        partition_column:
+            Optional Argument.
+            Specifies the column names of "data" on which to partition the input.
+            The name should be consistent with the "data_partition_column".
+            Note:
+                * If the "data_partition_column" is unicode with foreign language characters,
+                  it is necessary to specify "partition_column" argument.
+                * Column range is not supported for "partition_column" argument.
+                * This is valid when "data_partition_column" argument is used.
+            Types: str
         **generic_arguments:
             Specifies the generic keyword arguments SQLE functions accept. Below
             are the generic keyword arguments:
@@ -377,4 +428,114 @@ def GLM(formula=None, data=None, input_columns=None, response_column=None, famil
         # Print the result DataFrame.
         print(GLM_out_2.result)
         print(GLM_out_2.output_data)
+        # Example 3 : Generate generalized linear model(GLM) using stepwise regression algorithm.
+        #             This example uses the boston dataset and scales the data.
+        #             Scaled data is used as input data to generate the GLM model.
+        # loading the example data
+        load_example_data("decisionforest", ["boston"])
+        load_example_data('glm', ['housing_train_segment', 'housing_train_parameter', 'housing_train_attribute'])
+        # Create teradataml DataFrame objects.
+        boston_df = DataFrame('boston')
+        housing_seg = DataFrame('housing_train_segment')
+        housing_parameter = DataFrame('housing_train_parameter')
+        housing_attribute = DataFrame('housing_train_attribute')
+        # Scaling the data
+        # Scale "target_columns" with respect to 'STD' value of the column.
+        fit_obj = ScaleFit(data=boston_df,
+                        target_columns=['crim','zn','indus','chas','nox','rm','age','dis','rad','tax','ptratio','black','lstat',],
+                        scale_method="STD")
+        # Scale values specified in the input data using the fit data generated by the ScaleFit() function above.
+        obj = ScaleTransform(object=fit_obj.output,
+                            data=boston_df,
+                            accumulate=["id","medv"])
+        boston = obj.result
+        # Generate generalized linear model(GLM) using stepwise regression algorithm.
+        glm_1 = GLM(data=boston,
+                    input_columns=['indus','chas','nox','rm'],
+                    response_column='medv',
+                    family='GAUSSIAN',
+                    lambda1=0.02,
+                    alpha=0.33,
+                    batch_size=10,
+                    learning_rate='optimal',
+                    iter_max=36,
+                    iter_num_no_change=100,
+                    tolerance=0.0001,
+                    initial_eta=0.02,
+                    stepwise_direction='backward',
+                    max_steps_num=10)
+        # Print the result DataFrame.
+        print(glm_1.result)
+        # Example 4 : Generate generalized linear model(GLM) using
+        #             stepwise regression algorithm with initial_stepwise_columns.
+        glm_2 = GLM(data=boston,
+                    input_columns=['crim','zn','indus','chas','nox','rm','age','dis','rad','tax','ptratio','black','lstat'],
+                    response_column='medv',
+                    family='GAUSSIAN',
+                    lambda1=0.02,
+                    alpha=0.33,
+                    batch_size=10,
+                    learning_rate='optimal',
+                    iter_max=36,
+                    iter_num_no_change=100,
+                    tolerance=0.0001,
+                    initial_eta=0.02,
+                    stepwise_direction='bidirectional',
+                    max_steps_num=10,
+                    initial_stepwise_columns=['rad','tax']
+            )
+        # Print the result DataFrame.
+        print(glm_2.result)
+        # Example 5 : Generate generalized linear model(GLM) using partition by key.
+        glm_3 = GLM(data=housing_seg,
+                    input_columns=['bedrooms', 'bathrms', 'stories', 'driveway', 'recroom', 'fullbase', 'gashw', 'airco'],
+                    response_column='price',
+                    family='GAUSSIAN',
+                    batch_size=10,
+                    iter_max=1000,
+                    data_partition_column='partition_id'
+                    )
+        # Print the result DataFrame.
+        print(glm_3.result)
+        # Example 6 : Generate generalized linear model(GLM) using partition by key with attribute data.
+        glm_4 = GLM(data=housing_seg,
+                    input_columns=['bedrooms', 'bathrms', 'stories', 'driveway', 'recroom', 'fullbase', 'gashw', 'airco'],
+                    response_column='price',
+                    family='GAUSSIAN',
+                    batch_size=10,
+                    iter_max=1000,
+                    data_partition_column='partition_id',
+                    attribute_data = housing_attribute,
+                    attribute_data_partition_column = 'partition_id'
+                    )
+        # Print the result DataFrame.
+        print(glm_4.result)
+        # Example 7 : Generate generalized linear model(GLM) using partition by key with parameter data
+        glm_5 = GLM(data=housing_seg,
+                    input_columns=['bedrooms', 'bathrms', 'stories', 'driveway', 'recroom', 'fullbase', 'gashw', 'airco'],
+                    response_column='homestyle',
+                    family='binomial',
+                    iter_max=1000,
+                    data_partition_column='partition_id',
+                    parameter_data = housing_parameter,
+                    parameter_data_partition_column = 'partition_id'
+                    )
+        # Print the result DataFrame.
+        print(glm_5.result)
     """

teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.0py3-none-any.whl → 20.0.0.2py3-none-any.whl