PyPI - teradataml - Versions diffs - 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl - Mend

teradataml 20.0.0.3py3-none-any.whl → 20.0.0.5py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (151) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/README.md +193 -1
teradataml/__init__.py +2 -1
teradataml/_version.py +2 -2
teradataml/analytics/analytic_function_executor.py +25 -18
teradataml/analytics/byom/__init__.py +1 -1
teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
teradataml/analytics/sqle/__init__.py +20 -2
teradataml/analytics/utils.py +15 -1
teradataml/analytics/valib.py +18 -4
teradataml/automl/__init__.py +341 -112
teradataml/automl/autodataprep/__init__.py +471 -0
teradataml/automl/data_preparation.py +84 -42
teradataml/automl/data_transformation.py +69 -33
teradataml/automl/feature_engineering.py +76 -9
teradataml/automl/feature_exploration.py +639 -25
teradataml/automl/model_training.py +35 -14
teradataml/clients/auth_client.py +2 -2
teradataml/common/__init__.py +1 -2
teradataml/common/constants.py +122 -63
teradataml/common/messagecodes.py +14 -3
teradataml/common/messages.py +8 -4
teradataml/common/sqlbundle.py +40 -10
teradataml/common/utils.py +366 -74
teradataml/common/warnings.py +11 -0
teradataml/context/context.py +348 -86
teradataml/data/amazon_reviews_25.csv +26 -0
teradataml/data/apriori_example.json +22 -0
teradataml/data/byom_example.json +11 -0
teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
teradataml/data/hnsw_alter_data.csv +5 -0
teradataml/data/hnsw_data.csv +10 -0
teradataml/data/jsons/byom/h2opredict.json +1 -1
teradataml/data/jsons/byom/onnxembeddings.json +266 -0
teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
teradataml/data/ner_dict.csv +8 -0
teradataml/data/ner_input_eng.csv +7 -0
teradataml/data/ner_rule.csv +5 -0
teradataml/data/pos_input.csv +40 -0
teradataml/data/tdnerextractor_example.json +14 -0
teradataml/data/teradataml_example.json +21 -0
teradataml/data/textmorph_example.json +5 -0
teradataml/data/to_num_data.csv +4 -0
teradataml/data/tochar_data.csv +5 -0
teradataml/data/trans_dense.csv +16 -0
teradataml/data/trans_sparse.csv +55 -0
teradataml/data/vectordistance_example.json +1 -1
teradataml/dataframe/copy_to.py +45 -29
teradataml/dataframe/data_transfer.py +72 -46
teradataml/dataframe/dataframe.py +642 -166
teradataml/dataframe/dataframe_utils.py +167 -22
teradataml/dataframe/functions.py +135 -20
teradataml/dataframe/setop.py +11 -6
teradataml/dataframe/sql.py +330 -78
teradataml/dbutils/dbutils.py +556 -140
teradataml/dbutils/filemgr.py +14 -10
teradataml/hyperparameter_tuner/optimizer.py +12 -1
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
teradataml/opensource/_class.py +141 -17
teradataml/opensource/{constants.py → _constants.py} +7 -3
teradataml/opensource/_lightgbm.py +52 -53
teradataml/opensource/_sklearn.py +1008 -0
teradataml/opensource/_wrapper_utils.py +5 -5
teradataml/options/__init__.py +47 -15
teradataml/options/configure.py +103 -26
teradataml/options/display.py +13 -2
teradataml/plot/axis.py +47 -8
teradataml/plot/figure.py +33 -0
teradataml/plot/plot.py +63 -13
teradataml/scriptmgmt/UserEnv.py +307 -40
teradataml/scriptmgmt/lls_utils.py +428 -145
teradataml/store/__init__.py +2 -3
teradataml/store/feature_store/feature_store.py +102 -7
teradataml/table_operators/Apply.py +48 -19
teradataml/table_operators/Script.py +23 -2
teradataml/table_operators/TableOperator.py +3 -1
teradataml/table_operators/table_operator_util.py +58 -9
teradataml/utils/dtypes.py +49 -1
teradataml/utils/internal_buffer.py +38 -0
teradataml/utils/validators.py +377 -62
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
teradataml/data/SQL_Fundamentals.pdf +0 -0
teradataml/libaed_0_1.dylib +0 -0
teradataml/libaed_0_1.so +0 -0
teradataml/opensource/sklearn/__init__.py +0 -0
teradataml/store/vector_store/__init__.py +0 -1586
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0

teradataml/data/docs/sqle/docs_17_20/NERExtractor.py ADDED Viewed

@@ -0,0 +1,121 @@
+def NERExtractor(data=None, user_defined_data=None, rules_data=None, text_column=None,
+                 input_language="EN", show_context=0, accumulate=None,
+                 **generic_arguments):
+    """
+    DESCRIPTION:
+        NERExtractor() performs Named Entity Recognition (NER) on input text
+        according to user-defined dictionary words or regular expression (regex) patterns.
+    PARAMETERS:
+        data:
+            Required Argument.
+            Specifies the input teradataml DataFrame.
+            Types: teradataml DataFrame
+        user_defined_data:
+            Required Argument.
+            Specifies the teradataml DataFrame which contains user defined words and the corresponding entity label.
+            Types: teradataml DataFrame
+        rules_data:
+            Required Argument.
+            Specifies the teradataml DataFrame which contains user-defined regex patterns and the corresponding entity label.
+            Types: teradataml DataFrame
+        text_column:
+            Required Argument.
+            Specifies the name of the teradataml DataFrame column that will be used for NER search.
+            Types: str
+        input_language:
+            Optional Argument.
+            Specifies the language of input text.
+            Default Value: "EN"
+            Types: str
+        show_context:
+            Optional Argument.
+            Specifies the number of words before and after the matched entity. If leading or trailing
+            words are less than "show_context", then ellipsis (...) are added. Must be a positive value
+            less than 10.
+            Default Value: 0
+            Types: int
+        accumulate:
+            Optional Argument.
+            Specifies the name(s) of input teradataml DataFrame column(s) to copy to the output.
+            table to output.
+            Types: str or list of str
+        **generic_arguments:
+            Optional Argument.
+            Specifies the generic keyword arguments SQLE functions accept. Below are the generic
+            keyword arguments:
+                persist:
+                    Optional Argument.
+                    Specifies whether to persist the results of the function in a table or not.
+                    When set to True, results are persisted in a table; otherwise, results are
+                    garbage collected at the end of the session.
+                    Default Value: False
+                    Types: bool
+                volatile:
+                    Optional Argument.
+                    Specifies whether to put the results of the function in a volatile table or not.
+                    When set to True, results are stored in a volatile table; otherwise not.
+                    Default Value: False
+                    Types: bool
+            Function allows the user to partition, hash, order or local order the input
+            data. These generic arguments are available for each argument that accepts
+            teradataml DataFrame as input and can be accessed as:
+                * "<input_data_arg_name>_partition_column" accepts str or list of str (Strings)
+                * "<input_data_arg_name>_hash_column" accepts str or list of str (Strings)
+                * "<input_data_arg_name>_order_column" accepts str or list of str (Strings)
+                * "local_order_<input_data_arg_name>" accepts boolean
+            Note:
+                These generic arguments are supported by teradataml if the underlying SQLE Engine
+                function supports, else an exception is raised.
+    RETURNS:
+        Instance of NERExtractor.
+        Output teradataml DataFrames can be accessed using attribute references, such as TDNERExtractorObj.<attribute_name>.
+        Output teradataml DataFrame attribute name is:
+            result
+    RAISES:
+        TeradataMlException, TypeError, ValueError
+    EXAMPLES:
+        # Notes:
+        #     1. Get the connection to Vantage to execute the function.
+        #     2. One must import the required functions mentioned in the example from teradataml.
+        #     3. Function will raise an error if not supported on the Vantage user is connected to.
+        # Load the example data.
+        load_example_data("tdnerextractor", ["ner_input_eng", "ner_dict", "ner_rule"])
+        # Create teradataml DataFrame objects.
+        df = DataFrame.from_table("ner_input_eng")
+        user_defined_words = DataFrame.from_table("ner_dict")
+        rules = DataFrame.from_table("ner_rule")
+        # Check the list of available analytic functions.
+        display_analytic_functions()
+        # Import function NERExtractor.
+        from teradataml import NERExtractor
+        # Example 1: Perform Named Entity Recognition (NER) using Rules and Dict with Accumulate.
+        NER_out = NERExtractor(data=df,
+                               user_defined_data=user_defined_words,
+                               rules_data=rules,
+                               text_column=["txt"],
+                               input_language="en",
+                               show_context=3,
+                               accumulate=["id"])
+        # Print the result DataFrame.
+        print(NER_out.result)
+    """

teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py CHANGED Viewed

@@ -33,7 +33,7 @@ def NGramSplitter(data=None, text_column=None, delimiter=" ", grams=None, overla
         delimiter:
             Optional Argument.
-            Specifies a character or string that separates words in the input text. The
+            Specifies a character or string or a regular expression that separates words in the input text. The
             default value is the set of all whitespace characters which includes
             the characters for space, tab, newline, carriage return and some
             others.
@@ -66,14 +66,14 @@ def NGramSplitter(data=None, text_column=None, delimiter=" ", grams=None, overla
         punctuation:
             Optional Argument.
-            Specifies a string that specifies the punctuation characters for the function
+            Specifies a string or a regular expression that specifies the punctuation characters for the function
             to remove before evaluating the input text.
             Default Value: "`~#^&*()-"
             Types: str
         reset:
             Optional Argument.
-            Specifies a string that specifies the character or string that ends a sentence.
+            Specifies a string or a regular expression that specifies the character or string that ends a sentence.
             At the end of a sentence, the function discards any partial n-grams and searches
             for the next n-gram at the beginning of the next sentence. An n-gram
             cannot span two sentences.

teradataml/data/docs/sqle/docs_17_20/SMOTE.py ADDED Viewed

@@ -0,0 +1,212 @@
+def SMOTE(data = None, encoding_data = None, id_column = None,
+          response_column = None, input_columns = None, categorical_columns = None,
+          median_standard_deviation = None, minority_class = None,
+          oversampling_factor = 5, sampling_strategy = "smote",
+          fill_sampleid = True, noninput_columns_value = "sample", n_neighbors = 5,
+          seed = None, **generic_arguments):
+    """
+    DESCRIPTION:
+        SMOTE() function generates data by oversampling a minority class using
+        smote, adasyn, borderline-2 or smote-nc algorithms.
+    PARAMETERS:
+        data:
+            Required Argument.
+            Specifies the input teradataml DataFrame.
+            Types: teradataml DataFrame
+        encoding_data:
+            Optional Argument, Required when "sampling_strategy" is set to 'smotenc' algorithm.
+            Specifies the teradataml dataframe containing the ordinal encoding information.
+            Types: teradataml DataFrame
+        id_column:
+            Required Argument.
+            Specifies the name of the column in "data" that
+            uniquely identifies a data sample.
+            Types: str
+        response_column:
+            Optional Argument.
+            Specifies the name of the column in "data" that contains the
+            numeric value to be used as the response value for a sample.
+            Types: str
+        input_columns:
+            Required Argument.
+            Specifies the name of the input columns in "data" for oversampling.
+            Types: str OR list of Strings (str)
+        categorical_columns:
+            Optional Argument, Required when "sampling_strategy" is set to 'smotenc' algorithm.
+            Specifies the name of the categorical columns in the "data" that
+            the function uses for oversampling with smotenc.
+            Types: str OR list of Strings (str)
+        median_standard_deviation:
+            Optional Argument, Required when "sampling_strategy" is set to 'smotenc' algorithm.
+            Specifies the median of the standard deviations computed over the
+            numerical input columns.
+            Types: float
+        minority_class:
+            Required Argument.
+            Specifies the minority class for which synthetic samples need to be
+            generated.
+            Note:
+                * The label for minority class under response column must be numeric integer.
+            Types: str
+        oversampling_factor:
+            Optional Argument.
+            Specifies the factor for oversampling the minority class.
+            Default Value: 5
+            Types: float
+        sampling_strategy:
+            Optional Argument.
+            Specifies the oversampling algorithm to be used to create synthetic samples.
+            Default Value: "smote"
+            Permitted Values: "smote", "adasyn", "borderline", "smotenc"
+            Types: str
+        fill_sampleid:
+            Optional Argument.
+            Specifies whether to include the id of the original observation used
+            to generate each synthetic observation.
+            Default Value: True
+            Types: bool
+        noninput_columns_value:
+            Optional Argument.
+            Specifies the value to put in a sample column for columns not
+            specified as input columns.
+            Default Value: "sample"
+            Permitted Values: "sample", "neighbor", "null"
+            Types: str
+        n_neighbors:
+            Optional Argument.
+            Specifies the number of nearest neighbors for choosing the sample to
+            be used in oversampling.
+            Default Value: 5
+            Types: int
+        seed:
+            Optional Argument.
+            Specifies the random seed the algorithm uses for repeatable results.
+            The function uses the seed for random interpolation and generate the
+            synthetic sample.
+            Types: int
+        **generic_arguments:
+            Specifies the generic keyword arguments SQLE functions accept. Below
+            are the generic keyword arguments:
+                persist:
+                    Optional Argument.
+                    Specifies whether to persist the results of the
+                    function in a table or not. When set to True,
+                    results are persisted in a table; otherwise,
+                    results are garbage collected at the end of the
+                    session.
+                    Default Value: False
+                    Types: bool
+                volatile:
+                    Optional Argument.
+                    Specifies whether to put the results of the
+                    function in a volatile table or not. When set to
+                    True, results are stored in a volatile table,
+                    otherwise not.
+                    Default Value: False
+                    Types: bool
+            Function allows the user to partition, hash, order or local
+            order the input data. These generic arguments are available
+            for each argument that accepts teradataml DataFrame as
+            input and can be accessed as:
+                * "<input_data_arg_name>_partition_column" accepts str or
+                    list of str (Strings)
+                * "<input_data_arg_name>_hash_column" accepts str or list
+                    of str (Strings)
+                * "<input_data_arg_name>_order_column" accepts str or list
+                    of str (Strings)
+                * "local_order_<input_data_arg_name>" accepts boolean
+            Note:
+                These generic arguments are supported by teradataml if
+                the underlying SQL Engine function supports, else an
+                exception is raised.
+    RETURNS:
+        Instance of SMOTE.
+        Output teradataml DataFrames can be accessed using attribute
+        references, such as SMOTEObj.<attribute_name>.
+        Output teradataml DataFrame attribute name is:
+            result
+    RAISES:
+        TeradataMlException, TypeError, ValueError
+    EXAMPLES:
+        # Notes:
+        #     1. Get the connection to Vantage, before importing the
+        #        function in user space.
+        #     2. User can import the function, if it is available on
+        #        Vantage user is connected to.
+        #     3. To check the list of analytic functions available on
+        #        Vantage user connected to, use
+        #        "display_analytic_functions()".
+        # Load the example data.
+        load_example_data("dataframe", "iris_test")
+        load_example_data("teradataml", "titanic")
+        # Create teradataml DataFrame objects.
+        iris_input = DataFrame.from_table("iris_test").iloc[:25]
+        titanic_input = DataFrame("titanic").iloc[:50]
+        # Create Encoding DataFrame objects.
+        encoded_data = OrdinalEncodingFit(data=titanic_input,
+                                          target_column=['sex','embarked'],
+                                          approach="AUTO")
+        # Check the list of available analytic functions.
+        display_analytic_functions()
+        # Import function SMOTE.
+        from teradataml import SMOTE
+        # Example 1 : Generate synthetic samples using smote algorithm.
+        smote_out = SMOTE(data = iris_input,
+                          n_neighbors = 5,
+                          id_column='id',
+                          minority_class='3',
+                          response_column='species',
+                          input_columns =['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
+                          oversampling_factor=2,
+                          sampling_strategy='smote',
+                          seed=10)
+        # Print the result DataFrame.
+        print(smote_out.result)
+        # Example 2 : Generate synthetic samples using smotenc algorithm with categorical columns.
+        smote_out2 = SMOTE(data = titanic_input,
+                           encoding_data = encoded_data.result,
+                           id_column = 'passenger',
+                           response_column = 'survived',
+                           input_columns = ['parch', 'age', 'sibsp'],
+                           categorical_columns = ['sex', 'embarked'],
+                           median_standard_deviation = 31.47806044604718,
+                           minority_class = '1',
+                           oversampling_factor = 5,
+                           sampling_strategy = "smotenc",
+                           noninput_columns_value = "null",
+                           n_neighbors = 5)
+        # Print the result DataFrame.
+        print(smote_out2.result)
+    """

teradataml/data/docs/sqle/docs_17_20/Shap.py CHANGED Viewed

@@ -1,4 +1,4 @@
-def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM",
+def Shap(data = None, object = None, id_column=None, training_function = None,
          model_type = "Regression", input_columns = None, detailed = False,
          accumulate = None, num_parallel_trees = 1000, num_boost_rounds = 10,
          **generic_arguments):
@@ -29,7 +29,6 @@ def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM
         training_function:
             Required Argument.
             Specifies the model type name.
-            Default Value: "TD_GLM"
             Permitted Values: TD_GLM, TD_DECISIONFOREST, TD_XGBOOST
             Types: str
@@ -50,6 +49,9 @@ def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM
             Optional Argument.
             Specifies whether to output detailed shap information about the
             forest trees.
+            Note:
+                * It is only supported for "TD_XGBOOST" and "TD_DECISIONFOREST"
+                  training functions.
             Default Value: False
             Types: bool
@@ -151,10 +153,10 @@ def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM
         # Example 1: Shap for classification model.
         XGBoost_out = XGBoost(data=iris_input,
-                      input_columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
-                      response_column = 'species',
-                      model_type='Classification',
-                      iter_num=25)
+                              input_columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
+                              response_column = 'species',
+                              model_type='Classification',
+                              iter_num=25)
         Shap_out = Shap(data=iris_input,
                         object=XGBoost_out.result,
@@ -200,4 +202,24 @@ def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM
         # Print the result DataFrame.
         print(Shap_out2.output_data)
+        # Example 3: Shap for GLM model.
+        from teradataml import GLM
+        GLM_out = GLM(data=transform_obj.result,
+                      input_columns=['MedInc', 'HouseAge', 'AveRooms',
+                                     'AveBedrms', 'Population', 'AveOccup',
+                                     'Latitude', 'Longitude'],
+                      response_column="MedHouseVal",
+                      family="GAUSSIAN")
+        Shap_out3 = Shap(data=transform_obj.result,
+                         object=GLM_out.result,
+                         id_column='id',
+                         training_function="TD_GLM",
+                         model_type="Regression",
+                         input_columns=['MedInc', 'HouseAge', 'AveRooms','AveBedrms', 'Population', 'AveOccup','Latitude', 'Longitude'],
+                         detailed=False)
+        # Print the result DataFrame.
+        print(Shap_out3.output_data)
     """

teradataml/data/docs/sqle/docs_17_20/TextMorph.py ADDED Viewed

@@ -0,0 +1,119 @@
+def TextMorph(data=None, word_column=None, pos=None,
+              single_output=False, postag_column=None,
+              accumulate=None, **generic_arguments):
+    """
+    DESCRIPTION:
+        TextMorph() function generate morphs of given words in the input dataset.
+    PARAMETERS:
+        data:
+            Required Argument.
+            Specifies the input teradataml DataFrame.
+            Types: teradataml DataFrame
+        word_column:
+            Required Argument.
+            Specifies the name of the input column that contains words for which morphs are to be generated.
+            Types: str
+        pos:
+            Optional Argument.
+            Specifies the part of speech (POS) to output.
+            Permitted Values: "NOUN", "VERB", "ADV", "ADJ"
+            Types: str or list of str
+        single_output:
+            Optional Argument.
+            Specifies whether to output only one morph for each word. If set to `False`,
+            the function outputs all morphs for each word.
+            Default Value: False
+            Types: bool
+        postag_column:
+            Optional Argument.
+            Specifies the name of the  column in data that contains the part-of-speech (POS)
+            tags of the words, output by the function TD_POSTagger.
+            Types: str
+        accumulate:
+            Optional Argument.
+            Specifies the names of the input columns to copy to the output table.
+            Types: str or list of str
+        **generic_arguments:
+            Optional Argument.
+            Specifies the generic keyword arguments SQLE functions accept. Below are the generic
+            keyword arguments:
+                persist:
+                    Optional Argument.
+                    Specifies whether to persist the results of the function in a table or not.
+                    When set to True, results are persisted in a table; otherwise, results are
+                    garbage collected at the end of the session.
+                    Default Value: False
+                    Types: bool
+                volatile:
+                    Optional Argument.
+                    Specifies whether to put the results of the function in a volatile table or not.
+                    When set to True, results are stored in a volatile table; otherwise not.
+                    Default Value: False
+                    Types: bool
+            Function allows the user to partition, hash, order or local order the input
+            data. These generic arguments are available for each argument that accepts
+            teradataml DataFrame as input and can be accessed as:
+                * "<input_data_arg_name>_partition_column" accepts str or list of str (Strings)
+                * "<input_data_arg_name>_hash_column" accepts str or list of str (Strings)
+                * "<input_data_arg_name>_order_column" accepts str or list of str (Strings)
+                * "local_order_<input_data_arg_name>" accepts boolean
+            Note:
+                These generic arguments are supported by teradataml if the underlying SQLE Engine
+                function supports, else an exception is raised.
+    RETURNS:
+        Instance of TextMorph.
+        Output teradataml DataFrames can be accessed using attribute references, such as TDTextMorphObj.<attribute_name>.
+        Output teradataml DataFrame attribute name is:
+            result
+    RAISES:
+        TeradataMlException, TypeError, ValueError
+    EXAMPLES:
+        # Notes:
+        #     1. Get the connection to Vantage to execute the function.
+        #     2. One must import the required functions mentioned in the example from teradataml.
+        #     3. Function will raise an error if not supported on the Vantage user is connected to.
+        # Load the example data.
+        load_example_data("textmorph", ["words_input","pos_input"])
+        # Create teradataml DataFrame objects.
+        data1 = DataFrame.from_table("words_input")
+        data2 = DataFrame.from_table("pos_input")
+        # Check the list of available analytic functions.
+        display_analytic_functions()
+        # Import function TextMorph.
+        from teradataml import TextMorph
+        # Example 1: Generate morphs for words in the input dataset.
+        TextMorph_out = TextMorph(data=data1,
+                                  word_column="data2",
+                                  pos=["noun", "verb"],
+                                  single_output=True,
+                                  accumulate=["id"])
+        # Print the result DataFrame.
+        print(TextMorph_out.result)
+        Example 2 : Generate morphs for words in the input dataset with POS tags.
+        TextMorph_pos = TextMorph(data=data2,
+                                  word_column="word",
+                                  postag_column="pos_tag",
+                                  accumulate=["id","pos_tag"])
+        # Print the result DataFrame.
+        print(TextMorph_pos.result)
+    """

teradataml/data/docs/sqle/docs_17_20/TextParser.py CHANGED Viewed

@@ -1,6 +1,9 @@
-def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=True, stem_tokens=False,
-               remove_stopwords=False, accumulate=None, delimiter=" \t\n\f\r",
-               punctuation="!#$%&()*+,-./:;?@\^_`{|}~", token_col_name=None, **generic_arguments):
+def TextParser(data=None, object=None, text_column=None, enforce_token_limit=False,
+               convert_to_lowercase=True, stem_tokens=False, remove_stopwords=False,
+               accumulate=None, delimiter=" \t\n\f\r", delimiter_regex=None,
+               punctuation="!#$%&()*+,-./:;?@\^_`{|}~", token_col_name=None,
+               doc_id_column=None, list_positions=False, token_frequency=False,
+               output_by_word=True, **generic_arguments):
     """
     DESCRIPTION:
         The TextParser() function can parse text and perform the following operations:
@@ -38,6 +41,13 @@ def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=Tr
             Specifies the name of the input data column whose contents are to be tokenized.
             Types: str
+        enforce_token_limit:
+            Optional Argument.
+            Specifies whether to throw an informative error when finding token larger than
+            64K/32K or silently discard those larger tokens.
+            Default Value: False
+            Types: bool
         convert_to_lowercase:
             Optional Argument.
             Specifies whether to convert the text in "text_column" to lowercase.
@@ -71,6 +81,11 @@ def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=Tr
             Default Value: " \\t\\n\\f\\r"
             Types: str
+        delimiter_regex:
+            Optional Argument.
+            Specifies a Perl Compatible regular expression that represents the word delimiter.
+            Types: str
         punctuation:
             Optional Argument.
             Specifies the punctuation characters to replace with a space in the input text.
@@ -83,6 +98,29 @@ def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=Tr
             the text of the specified column in the "text_column" element.
             Types: str
+        doc_id_column:
+            Optional Argument.
+            Specifies the name of the column that uniquely identifies a row in the input table.
+            Types: str
+        list_positions:
+            Optional Argument.
+            Specifies whether to output the positions of a word in list form.
+            Default Value: False
+            Types: bool
+        token_frequency:
+            Optional Argument.
+            Specifies whether to output the frequency for each token.
+            Default Value: False
+            Types: bool
+        output_by_word:
+            Optional Argument.
+            Specifies whether to output each token in a separate row or all tokens in one.
+            Default Value: True
+            Types: bool
         **generic_arguments:
             Specifies the generic keyword arguments SQLE functions accept. Below
             are the generic keyword arguments:
@@ -170,4 +208,17 @@ def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=Tr
         # Print the result DataFrame.
         print(TextParser_out.result)
+        # Example 3 : Tokenize  words in "text_data" column using delimiter regex,
+        #             convert tokens to lowercase and output token positions in a list format
+        TextParser_out = TextParser(data=complaints,
+                                    text_column="text_data",
+                                    doc_id_column="doc_id",
+                                    delimeter_regex="[ \t\f\r\n]+",
+                                    list_positions=True,
+                                    convert_to_lowercase=True,
+                                    output_by_word=False)
+        # Print the result DataFrame.
+        print(TextParser_out.result)
     """

teradataml/data/docs/uaf/docs_17_20/ACF.py CHANGED Viewed

@@ -94,7 +94,7 @@ def ACF(data=None, data_filter_expr=None, max_lags=None,
             Default behavior when "alpha" avoided or not a positive
             float:
                 * The function does not return confidence intervals.
-            Types: float
+            Types: int OR float
         **generic_arguments:
             Specifies the generic keyword arguments of UAF functions.

teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py CHANGED Viewed

@@ -169,7 +169,7 @@ def ArimaEstimate(data1=None, data1_filter_expr=None, data2=None,
             at the end to specify the intercept coefficient initial
             value, then the formula is as follows:
                 p+q+P+Q+constant
-            Types: float, list of float
+            Types: int, list of int, float, list of float
         fixed:
             Optional Argument.
@@ -183,7 +183,7 @@ def ArimaEstimate(data1=None, data1_filter_expr=None, data2=None,
             at the end to specify the intercept coefficient initial
             value, then the formula is as follows:
                 p+q+P+Q+constant
-            Types: float, list of float
+            Types: int, list of int, float, list of float
         constant:
             Optional Argument.

teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py CHANGED Viewed

@@ -95,7 +95,7 @@ def ArimaXEstimate(data1=None, data1_filter_expr=None, data2=None,
             MA coefficients, the seasonal SAR regression
             coefficients and the SMA coefficients. The formula is
             as follows: 'p+q+P+Q+CONSTANT-length-init-list'
-            Types: float, list of float
+            Types: int, list of int, float, list of float
         fixed:
             Optional Argument.
@@ -107,7 +107,7 @@ def ArimaXEstimate(data1=None, data1_filter_expr=None, data2=None,
             If an intercept is needed, one more value is added at
             the end to specify the intercept coefficient initial value.
             The formula is as follows: 'p+q+P+Q+CONSTANT-length-fixed-list'
-            Types: float, list of float
+            Types: int, list of int, float, list of float
         constant:
             Optional Argument.

teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.3py3-none-any.whl → 20.0.0.5py3-none-any.whl