PyPI - teradataml - Versions diffs - 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl - Mend

teradataml 20.0.0.4py3-none-any.whl → 20.0.0.6py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (131) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/README.md +182 -13
teradataml/__init__.py +2 -1
teradataml/_version.py +2 -2
teradataml/analytics/analytic_function_executor.py +8 -13
teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
teradataml/analytics/sqle/__init__.py +16 -1
teradataml/analytics/utils.py +60 -1
teradataml/automl/__init__.py +290 -106
teradataml/automl/autodataprep/__init__.py +471 -0
teradataml/automl/data_preparation.py +29 -10
teradataml/automl/data_transformation.py +11 -0
teradataml/automl/feature_engineering.py +64 -4
teradataml/automl/feature_exploration.py +639 -25
teradataml/automl/model_training.py +1 -1
teradataml/clients/auth_client.py +12 -8
teradataml/clients/keycloak_client.py +165 -0
teradataml/common/constants.py +71 -26
teradataml/common/exceptions.py +32 -0
teradataml/common/messagecodes.py +28 -0
teradataml/common/messages.py +13 -4
teradataml/common/sqlbundle.py +3 -2
teradataml/common/utils.py +345 -45
teradataml/context/context.py +259 -93
teradataml/data/apriori_example.json +22 -0
teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
teradataml/data/jsons/byom/onnxembeddings.json +1 -0
teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
teradataml/data/ner_dict.csv +8 -0
teradataml/data/ner_input_eng.csv +7 -0
teradataml/data/ner_rule.csv +5 -0
teradataml/data/pattern_matching_data.csv +11 -0
teradataml/data/pos_input.csv +40 -0
teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
teradataml/data/tdnerextractor_example.json +14 -0
teradataml/data/teradataml_example.json +21 -1
teradataml/data/textmorph_example.json +5 -0
teradataml/data/to_num_data.csv +4 -0
teradataml/data/tochar_data.csv +5 -0
teradataml/data/trans_dense.csv +16 -0
teradataml/data/trans_sparse.csv +55 -0
teradataml/data/url_data.csv +10 -9
teradataml/dataframe/copy_to.py +38 -27
teradataml/dataframe/data_transfer.py +61 -45
teradataml/dataframe/dataframe.py +1110 -132
teradataml/dataframe/dataframe_utils.py +73 -27
teradataml/dataframe/functions.py +1070 -9
teradataml/dataframe/sql.py +750 -959
teradataml/dbutils/dbutils.py +33 -13
teradataml/dbutils/filemgr.py +14 -10
teradataml/hyperparameter_tuner/utils.py +4 -2
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/_base.py +12 -157
teradataml/options/configure.py +24 -9
teradataml/scriptmgmt/UserEnv.py +317 -39
teradataml/scriptmgmt/lls_utils.py +456 -135
teradataml/sdk/README.md +79 -0
teradataml/sdk/__init__.py +4 -0
teradataml/sdk/_auth_modes.py +422 -0
teradataml/sdk/_func_params.py +487 -0
teradataml/sdk/_json_parser.py +453 -0
teradataml/sdk/_openapi_spec_constants.py +249 -0
teradataml/sdk/_utils.py +236 -0
teradataml/sdk/api_client.py +897 -0
teradataml/sdk/constants.py +62 -0
teradataml/sdk/modelops/__init__.py +98 -0
teradataml/sdk/modelops/_client.py +406 -0
teradataml/sdk/modelops/_constants.py +304 -0
teradataml/sdk/modelops/models.py +2308 -0
teradataml/sdk/spinner.py +107 -0
teradataml/store/__init__.py +1 -1
teradataml/table_operators/Apply.py +16 -1
teradataml/table_operators/Script.py +20 -1
teradataml/table_operators/query_generator.py +4 -21
teradataml/table_operators/table_operator_util.py +58 -9
teradataml/utils/dtypes.py +4 -2
teradataml/utils/internal_buffer.py +22 -2
teradataml/utils/utils.py +0 -1
teradataml/utils/validators.py +318 -58
{teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/METADATA +188 -14
{teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/RECORD +131 -84
{teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/zip-safe +0 -0

teradataml/automl/feature_exploration.py CHANGED Viewed

@@ -13,6 +13,11 @@
 # Function Version: 1.0
 # ##################################################################
+# Python Libraries
+import pandas as pd
+import matplotlib.pyplot as plt
+import seaborn as sns
+import math
 # Teradata libraries
 from teradataml.dataframe.dataframe import DataFrame
@@ -22,6 +27,8 @@ from teradataml import OutlierFilterFit, OutlierFilterTransform
 from teradataml.hyperparameter_tuner.utils import _ProgressBar
 from teradataml.common.messages import Messages, MessageCodes
 from teradataml import display as dp
+from teradataml.utils.validators import _Validators
+from teradataml.common.utils import UtilFuncs
 def _is_terminal():
     """
@@ -54,7 +61,8 @@ class _FeatureExplore:
     def __init__(self,
                 data=None,
                 target_column=None,
-                verbose=0):
+                verbose=0,
+                task_type='regression'):
         """
         DESCRIPTION:
             Internal function initializes the data, target column for feature exploration.
@@ -79,14 +87,25 @@ class _FeatureExplore:
                     * 1: prints the execution steps of AutoML.
                     * 2: prints the intermediate data between the execution of each step of AutoML.
                 Types: int
+            task_type:
+                Optional Argument.
+                Specifies the task type of the data.
+                Default Value: 'regression'
+                Permitted Values:
+                    * 'regression'
+                    * 'classification'
+                Types: str
         """
         self.data = data
         self.target_column = target_column
         self.verbose = verbose
         self.terminal_print = _is_terminal()
         self.style = self._common_style()
+        self.task_type = task_type
-    def _exploration(self):
+    def _exploration(self,
+                     **kwargs):
         """
         DESCRIPTION:
             Internal function performs following operations:
@@ -101,7 +120,9 @@ class _FeatureExplore:
         categorical_columns= []
         date_column_list = []
-        self._display_heading(phase=0)
+        aml_phases = kwargs.get('automl_phases', None)
+        self._display_heading(phase=0,
+                              automl_phases=aml_phases)
         self._display_msg(msg='Feature Exploration started ...')
         # Detecting numerical and categorical column
@@ -227,33 +248,22 @@ class _FeatureExplore:
                               data=gfc_out.result,
                               show_data=True)
-    def _target_column_details(self,
-                               plot_data = None):
+    def _target_column_details(self):
         """
         DESCRIPTION:
             Internal function displays the target column distribution of Target column/ Response column.
         PARAMETERS:
-            plot_data:
-                Required Argument.
-                Specifies the input teradataml DataFrame for plotting distribution.
-                Types: teradataml Dataframe
+            None
         """
         if self._check_visualization_libraries() and not _is_terminal():
-            import matplotlib.pyplot as plt
-            import seaborn as sns
-            if plot_data is None:
-                target_data = self.data.select([self.target_column]).to_pandas()
-            else:
-                target_data = plot_data[[self.target_column]]
+            # Plotting target column distribution
             self._display_msg(msg='\nTarget Column Distribution:',
                               show_data=True)
-            plt.figure(figsize=(8, 6))
-            # Ploting a histogram for target column
-            plt.hist(target_data, bins=10, density=True, edgecolor='black')
-            plt.xlabel(self.target_column)
-            plt.ylabel('Density')
-            plt.show()
+            _FeatureExplore._visualize(data=self.data,
+                                       target_column=self.target_column,
+                                       plot_type=["target"],
+                                       problem_type=self.task_type)
     def _check_visualization_libraries(self):
         """
@@ -308,6 +318,9 @@ class _FeatureExplore:
             Pandas DataFrame containing, column name with outlier percentage.
         """
+        # Removing target column from the list of columns
+        column_list = [col for col in column_list if col != self.target_column]
         # Performing outlier fit on the data for replacing outliers with NULL value
         fit_params = {
             "data" : self.data,
@@ -379,7 +392,8 @@ class _FeatureExplore:
     def _display_heading(self,
                          phase=0,
-                         progress_bar=None):
+                         progress_bar=None,
+                         **kwargs):
         """
         DESCRIPTION:
             Internal function to print the phase of AutoML that
@@ -399,9 +413,14 @@ class _FeatureExplore:
         RETURNS:
             None.
         """
-        # Phases of automl
-        steps = ["1. Feature Exploration ->", " 2. Feature Engineering ->",
+        phases = ["1. Feature Exploration ->", " 2. Feature Engineering ->",
                  " 3. Data Preparation ->", " 4. Model Training & Evaluation"]
+        # Phases of automl
+        if kwargs.get('automl_phases', None) is not None:
+            steps = kwargs.get('automl_phases')
+        else:
+            steps = phases
         # Check verbose > 0
         if self.verbose > 0:
@@ -551,4 +570,599 @@ class _FeatureExplore:
                 # If data is provided
                 if data is not None:
                     # Print the data if terminal_print is True, else display the data
-                    print(data) if self.terminal_print else display(data)
+                    print(data) if self.terminal_print else display(data)
+    @staticmethod
+    def _visualize(data,
+                   target_column,
+                   plot_type=["target"],
+                   length=10,
+                   breadth=8,
+                   max_features=10,
+                   columns=None,
+                   problem_type=None):
+        """
+        DESCRIPTION:
+            Internal function to visualize the data using various plots such as heatmap,
+            pair plot, density, count plot, box plot, and target distribution.
+        PARAMETERS:
+            data:
+                Required Argument.
+                Specifies the input teradataml DataFrame for plotting.
+                Types: teradataml Dataframe
+            target_column:
+                Required Argument.
+                Specifies the name of the target column in "data".
+                Types: str
+            plot_type:
+                Optional Argument.
+                Specifies the type of plot to be displayed.
+                Default Value: "target"
+                Permitted Values:
+                    * "heatmap": Displays a heatmap of feature correlations.
+                    * "pair": Displays a pair plot of features.
+                    * "density": Displays a density plot of features.
+                    * "count": Displays a count plot of categorical features.
+                    * "box": Displays a box plot of numerical features.
+                    * "target": Displays the distribution of the target variable.
+                    * "all": Displays all the plots.
+                Types: str, list of str
+            length:
+                Optional Argument.
+                Specifies the length of the plot.
+                Default Value: 10
+                Types: int
+            breadth:
+                Optional Argument.
+                Specifies the breadth of the plot.
+                Default Value: 8
+                Types: int
+            columns:
+                Optional Argument.
+                Specifies the column names to be used for plotting.
+                Types: str or list of string
+            max_features:
+                Optional Argument.
+                Specifies the maximum number of features to be used for plotting.
+                Default Value: 10
+                Note:
+                    * It applies separately to categorical and numerical features.
+                Types: int
+            problem_type:
+                Optional Argument.
+                Specifies the type of problem.
+                Permitted Values:
+                    * 'regression'
+                    * 'classification'
+                Types: str
+        RETURNS:
+            None
+        RAISES:
+            TeradataMlException, ValueError, TypeError
+        EXAMPLES:
+            >>> _FeatureExplore._visualize(data=data,
+                                           target_column="target",
+                                           plot_type="heatmap",
+                                           length=10,
+                                           breadth=8,
+                                           max_features=10,
+                                           columns=["feature1", "feature2"],
+                                           problem_type="regression")
+        """
+        # Appending arguments to list for validation
+        arg_info_matrix = []
+        arg_info_matrix.append(["data", data, False, (DataFrame)])
+        arg_info_matrix.append(["target_column", target_column, False, (str)])
+        arg_info_matrix.append(["plot_type", plot_type, True, (str, list), True, ["heatmap", "pair", "all",
+                                                                                  "density", "count", "box", "target"]])
+        arg_info_matrix.append(["length", length, True, (int)])
+        arg_info_matrix.append(["breadth", breadth, True, (int)])
+        arg_info_matrix.append(["max_features", max_features, True, (int)])
+        arg_info_matrix.append(["problem_type", problem_type, True, (str), True, ["regression", "classification"]])
+        arg_info_matrix.append(["columns", columns, True, (str, list)])
+        # Validate argument types
+        _Validators._validate_function_arguments(arg_info_matrix)
+        # Validate that data has the required columns
+        _Validators._validate_dataframe_has_argument_columns(target_column, "target_column", data, "data")
+        _Validators._validate_dataframe_has_argument_columns(columns, "columns", data, "data")
+        # Convert data to pandas DataFrame if it's a teradataml DataFrame
+        cols = data.columns
+        data = data.to_pandas().reset_index()
+        # avoiding the index column
+        data = data[cols]
+        available_plots = ["target", "density", "count", "box", "pair",  "heatmap"]
+        # if target_column is str
+        if isinstance(target_column, str):
+            data[target_column] = data[target_column].astype("category").cat.codes
+        if plot_type == "all":
+            plot_type = available_plots
+        else:
+            plot_type = UtilFuncs._as_list(plot_type)
+        # Identify numerical and categorical columns
+        numerical_features = data.select_dtypes(include=['number']).columns.drop(target_column).tolist()
+        categorical_features = data.select_dtypes(include=['object', 'category']).columns.tolist()
+        # Handle selected_columns input
+        if columns:
+            selected_columns = UtilFuncs._as_list(columns)
+            selected_num_features = [col for col in selected_columns if col in numerical_features][:max_features]
+            selected_cat_features = [col for col in selected_columns if col in categorical_features][:max_features]
+        else:
+            # Compute correlation with target and select top correlated numerical features
+            if target_column in data.columns and pd.api.types.is_numeric_dtype(data[target_column]):
+                selected_num_features = (
+                    data[numerical_features]
+                    .corrwith(data[target_column])
+                    .abs()
+                    .nlargest(max_features)
+                    .index.tolist()
+                )
+            else:
+                selected_num_features = numerical_features[:max_features]
+            # Select top categorical features based on appearance
+            selected_cat_features = categorical_features[:max_features]
+        irrelevant_plot = []
+        # Sort plot_type based on the order in available_plots
+        # display univariate plots first, then bivariate, and finally multivariate
+        sorted_plot_type = sorted(plot_type, key=lambda x: available_plots.index(x.lower()))
+        for plot in sorted_plot_type:
+            # Target Distribution
+            if plot.lower() == "target":
+                msg = _FeatureExplore._target_distribution(data=data,
+                                                           target_column=target_column,
+                                                           problem_type=problem_type,
+                                                           length=length,
+                                                           breadth=breadth)
+            # Density Plot (for numerical features) - Grid
+            elif plot.lower() == "density":
+                msg = _FeatureExplore._density_plot(data=data,
+                                                    length=length,
+                                                    breadth=breadth,
+                                                    numerical_features=selected_num_features)
+            # Count Plot (for categorical features) - Grid
+            elif plot.lower() == "count":
+                msg = _FeatureExplore._count_plot(data=data,
+                                                  length=length,
+                                                  breadth=breadth,
+                                                  categorical_features=selected_cat_features)
+            # Box Plot (for numerical features) - Grid
+            elif plot.lower() == "box":
+                msg = _FeatureExplore._box_plot(data=data,
+                                                length=length,
+                                                breadth=breadth,
+                                                numerical_features=selected_num_features)
+            # Scatter Plot / Pair Plot
+            elif plot.lower() == "pair":
+                msg = _FeatureExplore._pair_plot(data=data,
+                                                    target_column=target_column,
+                                                    length=length,
+                                                    breadth=breadth,
+                                                    numerical_features=selected_num_features,
+                                                    categorical_features=selected_cat_features)
+            # Heatmap
+            elif plot.lower() == "heatmap":
+                msg = _FeatureExplore._heatmap(data=data,
+                                               target_column=target_column,
+                                               length=length,
+                                               breadth=breadth,
+                                               numerical_features=selected_num_features)
+            if msg:
+                irrelevant_plot.append(msg)
+        if irrelevant_plot:
+            for msg in irrelevant_plot:
+                print(msg)
+    @staticmethod
+    def _heatmap(data,
+                 target_column,
+                 length=10,
+                 breadth=8,
+                 numerical_features=[]):
+        """
+        DESCRIPTION:
+            Internal function to visualize the data using heatmap.
+        PARAMETERS:
+            data:
+                Required Argument.
+                Specifies the input pandas DataFrame for plotting.
+                Types: pandas Dataframe
+            target_column:
+                Required Argument.
+                Specifies the name of the target column in "data".
+                Types: str
+            length:
+                Optional Argument.
+                Specifies the length of the plot.
+                Default Value: 10
+                Types: int
+            breadth:
+                Optional Argument.
+                Specifies the breadth of the plot.
+                Default Value: 8
+                Types: int
+            numerical_features:
+                Optional Argument.
+                Specifies the list of numerical features to be plotted.
+                Types: list of str
+        RETURNS:
+            str
+        RAISES:
+            None
+        EXAMPLES:
+            >>> _FeatureExplore._heatmap(data=data,
+                                         target_column="target",
+                                         length=10,
+                                         breadth=8,
+                                         numerical_features=["feature1", "feature2"])
+        """
+        if len(numerical_features) >= 1:
+            plt.figure(figsize=(length, breadth))
+            sns.heatmap(data[numerical_features + [target_column]].corr(), annot=True, cmap="coolwarm")
+            plt.title("Feature Correlation Heatmap")
+            plt.show()
+        else:
+            return f"Plot type 'heatmap' is not applicable as no numerical features are available."
+    @staticmethod
+    def _pair_plot(data,
+                      target_column,
+                      length=10,
+                      breadth=8,
+                      numerical_features=[],
+                      categorical_features=[]):
+        """
+        DESCRIPTION:
+            Internal function to visualize the data using pair plot.
+        PARAMETERS:
+            data:
+                Required Argument.
+                Specifies the input pandas DataFrame for plotting.
+                Types: pandas Dataframe
+            target_column:
+                Required Argument.
+                Specifies the name of the target column in "data".
+                Types: str
+            length:
+                Optional Argument.
+                Specifies the length of the plot.
+                Default Value: 10
+                Types: int
+            breadth:
+                Optional Argument.
+                Specifies the breadth of the plot.
+                Default Value: 8
+                Types: int
+            numerical_features:
+                Optional Argument.
+                Specifies the list of numerical features to be plotted.
+                Types: list of str
+            categorical_features:
+                Optional Argument.
+                Specifies the list of categorical features to be plotted.
+                Types: list of str
+        RETURNS:
+            str
+        RAISES:
+            None
+        EXAMPLES:
+            >>> _FeatureExplore._pair_plot(data=data,
+                                              target_column="target",
+                                              length=10,
+                                              breadth=8,
+                                              numerical_features=["feature1", "feature2"])
+        """
+        if len(numerical_features) >= 1:
+            pair = sns.pairplot(data[numerical_features + [target_column]],
+                      hue=target_column if target_column in categorical_features else None)
+            # Add a centered title
+            pair.figure.suptitle("pair Plot", fontsize=16, y=1.02)
+            plt.show()
+        else:
+            return f"Plot type 'pair' is not applicable as no numerical features are available."
+    @staticmethod
+    def _density_plot(data,
+                      length=10,
+                      breadth=8,
+                      numerical_features=[]):
+        """
+        DESCRIPTION:
+            Internal function to visualize the data using density plot.
+        PARAMETERS:
+            data:
+                Required Argument.
+                Specifies the input pandas DataFrame for plotting.
+                Types: pandas Dataframe
+            length:
+                Optional Argument.
+                Specifies the length of the plot.
+                Default Value: 10
+                Types: int
+            breadth:
+                Optional Argument.
+                Specifies the breadth of the plot.
+                Default Value: 8
+                Types: int
+            numerical_features:
+                Optional Argument.
+                Specifies the list of numerical features to be plotted.
+                Types: list of str
+        RETURNS:
+            str
+        RAISES:
+            None
+        EXAMPLES:
+            >>> _FeatureExplore._density_plot(data=data,
+                                              length=10,
+                                              breadth=8,
+                                              numerical_features=["feature1", "feature2"])
+        """
+        if len(numerical_features) >= 1:
+            rows = math.ceil(len(numerical_features) / 3)
+            fig, axes = plt.subplots(rows, 3, figsize=(length, breadth))
+            axes = axes.flatten()
+            fig.suptitle("Density plot", fontsize=14)
+            for i, feature in enumerate(numerical_features):
+                sns.kdeplot(data[feature], fill=True, color="green", alpha=0.6, ax=axes[i])
+            # Hide any empty subplots
+            for i in range(len(numerical_features), len(axes)):
+                axes[i].axis('off')
+            plt.tight_layout()
+            plt.show()
+            return None
+        else:
+           return f"Plot type 'density' is not applicable as no numerical features are available."
+    @staticmethod
+    def _target_distribution(data,
+                             target_column,
+                             problem_type=None,
+                             length=10,
+                             breadth=8):
+        """
+        DESCRIPTION:
+            Function visualizes the target distribution.
+        PARAMETERS:
+            data:
+                Required Argument.
+                Specifies the input pandas DataFrame for plotting.
+                Types: pandas Dataframe
+            target_column:
+                Required Argument.
+                Specifies the name of the target column in "data".
+                Types: str
+            problem_type:
+                Optional Argument.
+                Specifies the type of problem.
+                Permitted Values:
+                    * 'regression'
+                    * 'classification'
+                Types: str
+            length:
+                Optional Argument.
+                Specifies the length of the plot.
+                Default Value: 10
+                Types: int
+            breadth:
+                Optional Argument.
+                Specifies the breadth of the plot.
+                Default Value: 8
+                Types: int
+        """
+        plt.figure(figsize=(length, breadth))
+        # Categorical Target
+        if (problem_type is None and data[target_column].nunique() <= 20) or \
+            (problem_type and problem_type.lower() == 'classification'):
+            sns.countplot(x=target_column,
+                          data=data,
+                          palette="coolwarm",
+                          hue=target_column,
+                          legend=False)
+        else:
+            # Numerical Target
+            sns.histplot(data[target_column], kde=True, color="blue")
+        plt.title("Target Distribution")
+        plt.tight_layout()
+        plt.show()
+    @staticmethod
+    def _count_plot(data,
+                    length=10,
+                    breadth=8,
+                    categorical_features=[]):
+        """
+        DESCRIPTION:
+            Internal function to visualize the data using count plot.
+        PARAMETERS:
+            data:
+                Required Argument.
+                Specifies the input pandas DataFrame for plotting.
+                Types: pandas Dataframe
+            length:
+                Optional Argument.
+                Specifies the length of the plot.
+                Default Value: 10
+                Types: int
+            breadth:
+                Optional Argument.
+                Specifies the breadth of the plot.
+                Default Value: 8
+                Types: int
+            categorical_features:
+                Optional Argument.
+                Specifies the list of categorical features to be plotted.
+                Types: list of str
+        RETURNS:
+            str
+        RAISES:
+            None
+        EXAMPLES:
+            >>> _FeatureExplore._count_plot(data=data,
+                                           length=10,
+                                           breadth=8,
+                                           categorical_features=["feature1", "feature2"])
+        """
+        if len(categorical_features) >= 1:
+            rows = math.ceil(len(categorical_features) / 3)
+            fig, axes = plt.subplots(rows, 3, figsize=(length, rows * 5))
+            axes = axes.flatten()
+            fig.suptitle("Count plot", fontsize=14)
+            for i, feature in enumerate(categorical_features):
+                # Get top 20 most frequent categories
+                top_categories = data[feature].value_counts().nlargest(25)
+                # Plot only top 20 categories
+                sns.barplot(x=top_categories.index,
+                            y=top_categories.values,
+                            hue=top_categories.index,
+                            palette="coolwarm",
+                            legend=False,
+                            ax=axes[i])
+                # Rotate labels for readability
+                axes[i].tick_params(axis='x', rotation=90)
+            # Hide empty subplots
+            for i in range(len(categorical_features), len(axes)):
+                axes[i].axis('off')
+            # Adjust layout spacing
+            plt.subplots_adjust(hspace=1.5, wspace=0.3)
+            plt.show()
+        else:
+            return f"Plot type 'count' is not applicable as no categorical features are available."
+    @staticmethod
+    def _box_plot(data,
+                  length=10,
+                  breadth=8,
+                  numerical_features=[]):
+        """
+        DESCRIPTION:
+            Internal function to visualize the data using box plot.
+        PARAMETERS:
+            data:
+                Required Argument.
+                Specifies the input pandas DataFrame for plotting.
+                Types: pandas Dataframe
+            length:
+                Optional Argument.
+                Specifies the length of the plot.
+                Default Value: 10
+                Types: int
+            breadth:
+                Optional Argument.
+                Specifies the breadth of the plot.
+                Default Value: 8
+                Types: int
+            numerical_features:
+                Optional Argument.
+                Specifies the list of numerical features to be plotted.
+                Types: list of str
+        RETURNS:
+            str
+        RAISES:
+            None
+        EXAMPLES:
+            >>> _FeatureExplore._box_plot(data=data,
+                                          length=10,
+                                          breadth=8,
+                                          numerical_features=["feature1", "feature2"])
+        """
+        if len(numerical_features) >= 1:
+            rows = math.ceil(len(numerical_features) / 3)
+            fig, axes = plt.subplots(rows, 3, figsize=(length, breadth))
+            axes = axes.flatten()
+            fig.suptitle("Box plot", fontsize=14)
+            for i, feature in enumerate(numerical_features):
+                # Removed the hue argument and passed only the feature to x
+                sns.boxplot(y=data[feature], data=data, ax=axes[i], legend=False)
+                # Adjust layout to prevent label overlap
+                plt.tight_layout()
+            # Hide any empty subplots
+            for i in range(len(numerical_features), len(axes)):
+                axes[i].axis('off')
+            plt.show()
+        else:
+            return f"Plot type 'box' is not applicable as no numerical features are available."

teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.4py3-none-any.whl → 20.0.0.6py3-none-any.whl