PyPI - teradataml - Versions diffs - 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl - Mend

teradataml 20.0.0.6py3-none-any.whl → 20.0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (96) hide show

teradataml/README.md +210 -0
teradataml/__init__.py +1 -1
teradataml/_version.py +1 -1
teradataml/analytics/analytic_function_executor.py +162 -76
teradataml/analytics/byom/__init__.py +1 -1
teradataml/analytics/json_parser/__init__.py +2 -0
teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
teradataml/analytics/json_parser/metadata.py +22 -4
teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
teradataml/analytics/sqle/__init__.py +3 -0
teradataml/analytics/utils.py +4 -1
teradataml/automl/__init__.py +2369 -464
teradataml/automl/autodataprep/__init__.py +15 -0
teradataml/automl/custom_json_utils.py +184 -112
teradataml/automl/data_preparation.py +113 -58
teradataml/automl/data_transformation.py +154 -53
teradataml/automl/feature_engineering.py +113 -53
teradataml/automl/feature_exploration.py +548 -25
teradataml/automl/model_evaluation.py +260 -32
teradataml/automl/model_training.py +399 -206
teradataml/clients/auth_client.py +2 -2
teradataml/common/aed_utils.py +11 -2
teradataml/common/bulk_exposed_utils.py +4 -2
teradataml/common/constants.py +62 -2
teradataml/common/garbagecollector.py +50 -21
teradataml/common/messagecodes.py +47 -2
teradataml/common/messages.py +19 -1
teradataml/common/sqlbundle.py +23 -6
teradataml/common/utils.py +116 -10
teradataml/context/aed_context.py +16 -10
teradataml/data/Employee.csv +5 -0
teradataml/data/Employee_Address.csv +4 -0
teradataml/data/Employee_roles.csv +5 -0
teradataml/data/JulesBelvezeDummyData.csv +100 -0
teradataml/data/byom_example.json +5 -0
teradataml/data/creditcard_data.csv +284618 -0
teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
teradataml/data/load_example_data.py +29 -11
teradataml/data/payment_fraud_dataset.csv +10001 -0
teradataml/data/teradataml_example.json +67 -0
teradataml/dataframe/copy_to.py +714 -54
teradataml/dataframe/dataframe.py +1153 -33
teradataml/dataframe/dataframe_utils.py +8 -3
teradataml/dataframe/functions.py +168 -1
teradataml/dataframe/setop.py +4 -1
teradataml/dataframe/sql.py +141 -9
teradataml/dbutils/dbutils.py +470 -35
teradataml/dbutils/filemgr.py +1 -1
teradataml/hyperparameter_tuner/optimizer.py +456 -142
teradataml/lib/aed_0_1.dll +0 -0
teradataml/lib/libaed_0_1.dylib +0 -0
teradataml/lib/libaed_0_1.so +0 -0
teradataml/lib/libaed_0_1_aarch64.so +0 -0
teradataml/scriptmgmt/UserEnv.py +234 -34
teradataml/scriptmgmt/lls_utils.py +43 -17
teradataml/sdk/_json_parser.py +1 -1
teradataml/sdk/api_client.py +9 -6
teradataml/sdk/modelops/_client.py +3 -0
teradataml/series/series.py +12 -7
teradataml/store/feature_store/constants.py +601 -234
teradataml/store/feature_store/feature_store.py +2886 -616
teradataml/store/feature_store/mind_map.py +639 -0
teradataml/store/feature_store/models.py +5831 -214
teradataml/store/feature_store/utils.py +390 -0
teradataml/table_operators/table_operator_util.py +1 -1
teradataml/table_operators/templates/dataframe_register.template +6 -2
teradataml/table_operators/templates/dataframe_udf.template +6 -2
teradataml/utils/docstring.py +527 -0
teradataml/utils/dtypes.py +93 -0
teradataml/utils/internal_buffer.py +2 -2
teradataml/utils/utils.py +41 -2
teradataml/utils/validators.py +694 -17
{teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
{teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
{teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0

teradataml/dataframe/dataframe_utils.py CHANGED Viewed

@@ -1796,7 +1796,8 @@ class DataFrameUtils():
         fil_nodeid = aed_utils._aed_filter(sel_nodeid, filter_str)
         sel2_nodeid = aed_utils._aed_select(fil_nodeid, sel_cols_str)
         col_names, col_types = __class__._get_column_names_and_types_from_metaexpr(df._metaexpr)
-        new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types))
+        new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types),
+                                                             datalake=df._metaexpr.datalake)
         # Call the function from_node from appropriate class either DataFrame or GeoDataFrame
         new_df = df.__class__._from_node(sel2_nodeid, new_metaexpr, df._index_label)
         new_df._orderby = df._orderby
@@ -1855,6 +1856,8 @@ class DataFrameUtils():
             db_schema = UtilFuncs._extract_db_name(tab_name_first)
             db_table_name = UtilFuncs._extract_table_name(tab_name_first)
+            if dfs[0]._metaexpr.datalake:
+                return DataFrame(in_schema(db_schema, db_table_name, dfs[0]._metaexpr.datalake))
             if db_schema:
                 return DataFrame(in_schema(db_schema, db_table_name))
@@ -1875,7 +1878,9 @@ class DataFrameUtils():
                 db_schema = UtilFuncs._extract_db_name(tab_name_first)
                 db_table_name = UtilFuncs._extract_table_name(tab_name_first)
-                if db_schema:
+                if dfs[i]._metaexpr.datalake:
+                    parent_df = DataFrame(in_schema(db_schema, db_table_name, dfs[i]._metaexpr.datalake))
+                elif db_schema:
                     parent_df = DataFrame(in_schema(db_schema, db_table_name))
                 else:
                     parent_df = DataFrame(db_table_name)
@@ -1919,7 +1924,7 @@ class DataFrameUtils():
         # 2. Comma separated parameters enclosed in parentheses
         # 3. Comma separated parameters without parenthesis
         # 4. Remaining string
-        pattern = "([A-Z0-9_]+)(\((.*)\))?(.*)"
+        pattern = r"([A-Z0-9_]+)(\((.*)\))?(.*)"
         m = re.match(pattern, td_type)
         td_str_type = m.group(1)

teradataml/dataframe/functions.py CHANGED Viewed

@@ -8,7 +8,7 @@ from teradataml.utils.utils import execute_sql
 import teradatasqlalchemy as tdsqlalchemy
 from teradataml.utils.validators import _Validators
 from teradataml.dataframe.sql import _SQLColumnExpression
-from teradatasqlalchemy import VARCHAR, CLOB, CHAR
+from teradatasqlalchemy import VARCHAR, CLOB, CHAR, DATE, TIMESTAMP
 from teradataml.common.constants import TableOperatorConstants, TeradataConstants, TeradataTypes
 from teradataml.common.utils import UtilFuncs
 from teradataml.dataframe.sql_interfaces import ColumnExpression
@@ -17,6 +17,7 @@ from teradataml.common.exceptions import TeradataMlException
 from teradataml.common.messages import Messages
 from teradataml.common.messagecodes import MessageCodes
 from teradataml.scriptmgmt.lls_utils import get_env
+from sqlalchemy import literal_column
 def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',', quotechar=None, debug=False):
     """
@@ -31,6 +32,8 @@ def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',
                packages should be inside the user defined function itself.
             3. Do not call a regular function defined outside the udf() from the user defined function.
                The function definition and call must be inside the udf(). Look at Example 9 to understand more.
+            4. One can use the `td_buffer` to cache the data in the user defined function.
+               Look at Example 10 to understand more.
     PARAMETERS:
         user_function:
@@ -321,6 +324,56 @@ def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',
         Alpha Co    210.0  200.0  215.0  250.0  17/01/04  2021-10-06
         Red Inc     200.0  150.0  140.0    NaN  17/01/04  2021-10-06
         >>>
+        # Example 10: Define a user defined function 'sentiment_analysis' to perform
+        #             sentiment analysis on the 'review' column using VADER.
+        #             Note - Cache the model in UDF using 'td_buffer' to avoid loading
+        #             the model every time the UDF is called.
+        # Load the data to run the example.
+        >>> from teradataml import *
+        >>> load_example_data("sentimentextractor", "sentiment_extract_input")
+        >>> df = DataFrame("sentiment_extract_input")
+        # Create the environment and install the required library.
+        >>> env = create_env('text_analysis', 'python_3.10', 'Test environment for UDF')
+        >>> env.install_lib('vaderSentiment')
+        # Create a user defined function to perform sentiment analysis.
+        >>> from teradatasqlalchemy.types import VARCHAR
+        >>> @udf(env_name = env, returns = VARCHAR(80),  delimiter='|')
+        ... def sentiment_analysis(txt):
+        ...    if 'vader_model' not in td_buffer:
+        ...        from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
+        ...        td_buffer['vader_model'] = SentimentIntensityAnalyzer()
+        ...    sid_obj = td_buffer['vader_model']
+        ...
+        ...    sentiment_dict = sid_obj.polarity_scores(txt)
+        ...    if sentiment_dict['compound'] >= 0.05 :
+        ...        sentiment = "Positive"
+        ...    elif sentiment_dict['compound'] <= - 0.05 :
+        ...        sentiment = "Negative"
+        ...    else :
+        ...        sentiment = "Neutral"
+        ...    return sentiment
+        # Assign the Column Expression returned by user defined function
+        # to the DataFrame.
+        >>> res = df.assign(sentiment = sentiment_analysis('review'))
+        >>> res = res.select(["id", "product", "sentiment"])
+        >>> res
+           id       product sentiment
+        0   5           gps  Positive
+        1   9    television  Negative
+        2   8        camera  Negative
+        3  10        camera  Negative
+        4   1        camera  Positive
+        5   4           gps  Positive
+        6   2  office suite  Positive
+        7   7           gps  Negative
+        8   6           gps  Negative
+        9   3        camera  Positive
+        >>>
     """
     allowed_datatypes = TeradataTypes.TD_ALL_TYPES.value
@@ -981,6 +1034,120 @@ def td_range(start, end=None, step=1):
     df = DataFrame.from_query(range_query)
     return df
+def current_date(time_zone='local'):
+    """
+    DESCRIPTION:
+        Returns the current date based on the specified time zone.
+    PARAMETERS:
+        time_zone:
+            Optional Argument.
+            Specifies the time zone to use for retrieving the current date.
+            Permitted Values:
+                - "local": Uses the local time zone.
+                - Any valid time zone string.
+            Default Value: "local"
+            Types: str
+    RETURNS:
+        ColumnExpression.
+    RAISES:
+        None
+    EXAMPLES:
+        # Example 1: Add a new column to the DataFrame that contains the
+        #            current date as its value. Consider system specified
+        #            timezone as timezone.
+        >>> from teradataml.dataframe.functions import current_date
+        >>> load_example_data('dataframe', 'sales')
+        >>> df = DataFrame("sales")
+        >>> df.assign(current_date=current_date())
+            accounts        Feb    Jan    Mar    Apr      datetime    current_date
+            Alpha Co      210.0  200.0    215    250    04/01/2017        25/05/27
+            Blue Inc       90.0     50     95    101    04/01/2017        25/05/27
+           Jones LLC      200.0    150    140    180    04/01/2017        25/05/27
+          Orange Inc      210.0   None   None    250    04/01/2017        25/05/27
+          Yellow Inc       90.0   None   None   None    04/01/2017        25/05/27
+             Red Inc      200.0    150    140   None    04/01/2017        25/05/27
+        # Example 2: Add a new column to the DataFrame that contains the
+        #            current date in a specific time zone as its value.
+        >>> from teradataml.dataframe.functions import current_date
+        >>> load_example_data('dataframe', 'sales')
+        >>> df = DataFrame("sales")
+        >>> df.assign(current_date=current_date("GMT"))
+            accounts        Feb    Jan    Mar    Apr      datetime    current_date
+            Alpha Co      210.0  200.0    215    250    04/01/2017        25/05/27
+            Blue Inc       90.0     50     95    101    04/01/2017        25/05/27
+           Jones LLC      200.0    150    140    180    04/01/2017        25/05/27
+          Orange Inc      210.0   None   None    250    04/01/2017        25/05/27
+          Yellow Inc       90.0   None   None   None    04/01/2017        25/05/27
+             Red Inc      200.0    150    140   None    04/01/2017        25/05/27
+    """
+    if time_zone == "local":
+        expr_ = "CURRENT_DATE AT LOCAL"
+    else:
+        expr_ = "CURRENT_DATE AT TIME ZONE '{}'".format(time_zone)
+    return _SQLColumnExpression(literal_column(expr_), type = DATE())
+def current_timestamp(time_zone='local'):
+    """
+    DESCRIPTION:
+        Returns the current timestamp based on the specified time zone.
+    PARAMETERS:
+        time_zone:
+            Optional Argument.
+            Specifies the time zone to use for retrieving the current timestamp.
+            Permitted Values:
+                - "local": Uses the local time zone.
+                - Any valid time zone string.
+            Default Value: "local"
+            Types: str
+    RETURNS:
+        ColumnExpression.
+    RAISES:
+        None
+    EXAMPLES:
+        # Example 1: Assign the current timestamp in the local time zone to a DataFrame column.
+        >>> from teradataml.dataframe.functions import current_timestamp
+        >>> load_example_data('dataframe', 'sales')
+        >>> df = DataFrame("sales")
+        >>> df.assign(current_timestamp = current_timestamp())
+          accounts      Feb    Jan    Mar    Apr      datetime                  current_timestamp
+          Alpha Co    210.0    200    215    250    04/01/2017   2025-05-27 17:36:56.750000+00:00
+          Blue Inc     90.0     50     95    101    04/01/2017   2025-05-27 17:36:56.750000+00:00
+         Jones LLC    200.0    150    140    180    04/01/2017   2025-05-27 17:36:56.750000+00:00
+        Orange Inc    210.0   None   None    250    04/01/2017   2025-05-27 17:36:56.750000+00:00
+        Yellow Inc     90.0   None   None   None    04/01/2017   2025-05-27 17:36:56.750000+00:00
+           Red Inc    200.0    150    140   None    04/01/2017   2025-05-27 17:36:56.750000+00:00
+        # Example 2: Assign the current timestamp in a specific time zone to a DataFrame column.
+        >>> from teradataml.dataframe.functions import current_timestamp
+        >>> load_example_data('dataframe', 'sales')
+        >>> df = DataFrame("sales")
+        >>> df.assign(current_timestamp = current_timestamp("GMT+10"))
+          accounts      Feb    Jan    Mar    Apr      datetime                  current_timestamp
+          Blue Inc     90.0     50     95    101    04/01/2017   2025-05-28 03:39:00.790000+10:00
+           Red Inc    200.0    150    140   None    04/01/2017   2025-05-28 03:39:00.790000+10:00
+        Yellow Inc     90.0   None   None   None    04/01/2017   2025-05-28 03:39:00.790000+10:00
+         Jones LLC    200.0    150    140    180    04/01/2017   2025-05-28 03:39:00.790000+10:00
+        Orange Inc    210.0   None   None    250    04/01/2017   2025-05-28 03:39:00.790000+10:00
+          Alpha Co    210.0    200    215    250    04/01/2017   2025-05-28 03:39:00.790000+10:00
+    """
+    if time_zone == "local":
+        expr_ = "CURRENT_TIMESTAMP AT LOCAL"
+    else:
+        expr_ = "CURRENT_TIMESTAMP AT TIME ZONE '{}'".format(time_zone)
+    return _SQLColumnExpression(literal_column(expr_), type = TIMESTAMP())
 def get_formatters(formatter_type = None):
         """
         DESCRIPTION:

teradataml/dataframe/setop.py CHANGED Viewed

@@ -19,6 +19,7 @@ from teradataml.common.utils import UtilFuncs
 from teradataml.dataframe import dataframe
 from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
 from teradataml.common.aed_utils import AedUtils
+from teradataml.dataframe.sql import _MetaExpression
 from teradataml.utils.validators import _Validators
 from teradatasqlalchemy.dialect import dialect as td_dialect, TeradataTypeCompiler as td_type_compiler
 from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
@@ -346,7 +347,9 @@ def __process_operation(meta_data, is_lazy, setop_type, nodeid, index_label, ind
                 break
         # Constructing new Metadata (_metaexpr) without DB; using dummy nodeid and get new metaexpr for nodeid.
-        meta_data = UtilFuncs._get_metaexpr_using_columns(nodeid, column_info) if is_lazy else meta_data
+        meta_data = UtilFuncs._get_metaexpr_using_columns(nodeid, column_info,
+                                                          datalake=meta_data.datalake if isinstance(meta_data, _MetaExpression) else None) if is_lazy \
+            else meta_data
     if is_lazy:
         return getattr(module, class_name)._from_node(nodeid, meta_data, index_label)

teradataml/dataframe/sql.py CHANGED Viewed

@@ -200,11 +200,10 @@ class _MetaExpression(object):
         RAISES:
             AttributeError if attribute can't be found
         """
-        res = getattr(self.__t, key, None)
-        if res is None:
+        try:
+            res = getattr(self.__t, key)
+        except AttributeError:
             raise AttributeError('Unable to find attribute: %s' % key)
         return res
     @property
@@ -501,7 +500,7 @@ class _SQLTableExpression(_PandasTableExpression):
             self.c = [_SQLColumnExpression(c) for c in table.c]
         self._n_rows = 0
+        self._datalake = kw.get('datalake', None)
     @property
     def c(self):
@@ -557,6 +556,13 @@ class _SQLTableExpression(_PandasTableExpression):
         self.__t = table
+    @property
+    def datalake(self):
+        """
+        Returns the underlying datalake information
+        """
+        return self._datalake
     def __repr__(self):
         """
         Returns a SELECT TOP string representing the underlying table.
@@ -10504,24 +10510,24 @@ class _SQLColumnExpression(_LogicalColumnExpression,
         # teradataml does not support regex grouping hence in some cases first used 'regex_replace' and
         # then 'regex_substr' or vice-versa.
         _part_to_extract_dict = {'HOST': _SQLColumnExpression(
-                                     func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), '(//[^/?#]+@)|(//)|(:\d+)', ''),
+                                     func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), r'(//[^/?#]+@)|(//)|(:\d+)', ''),
                                         type=VARCHAR()),
                                  'PATH': _SQLColumnExpression(func.regexp_substr(
                                      func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
                                      '([^?#]*)'), type=VARCHAR()),
                                  'QUERY': _SQLColumnExpression(func.ltrim(func.regexp_substr(
                                      func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)', ''),
-                                     '\?([^#]*)'), '?'), type=VARCHAR()),
+                                     r'\?([^#]*)'), '?'), type=VARCHAR()),
                                  'REF': _SQLColumnExpression(func.ltrim(func.regexp_substr(
                                      func.regexp_replace(self.expression,
-                                                         '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?', ''),
+                                                         r'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?', ''),
                                      '(#(.*))'), '#'), type=VARCHAR()),
                                  'PROTOCOL': _SQLColumnExpression(
                                      func.rtrim(func.regexp_substr(self.expression, '^(([^:/?#]+):)'), ':'),
                                      type=VARCHAR()),
                                  'FILE': _SQLColumnExpression(func.regexp_substr(
                                      func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
-                                     '([^?#]*)(\?([^#]*))?'), type=VARCHAR()),
+                                     r'([^?#]*)(\?([^#]*))?'), type=VARCHAR()),
                                  'AUTHORITY': _SQLColumnExpression(
                                      func.ltrim(func.regexp_substr(self.expression, '//([^/?#]*)'), '//'),
                                      type=VARCHAR()),
@@ -10770,3 +10776,129 @@ class _SQLColumnExpression(_LogicalColumnExpression,
         """
         return _SQLColumnExpression(literal_column(f"TD_ISFINITE({self.compile()})"), type=INTEGER)
+    def between(self, lower, upper):
+        """
+        DESCRIPTION:
+            Evaluates whether the column value is between the lower and upper bounds.
+            The lower and upper bounds are inclusive.
+        PARAMETERS:
+            lower:
+                Required Argument.
+                Specifies the lower bound value.
+                Type: ColumnExpression or str or int or float
+            upper:
+                Required Argument.
+                Specifies the upper bound value.
+                Type: ColumnExpression or str or int or float
+        RETURNS:
+            ColumnExpression
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("dataframe", "sales")
+            >>> df = DataFrame("sales")
+            >>> print(df)
+                        Feb    Jan    Mar    Apr    datetime
+            accounts
+            Blue Inc     90.0   50.0   95.0  101.0  04/01/2017
+            Alpha Co    210.0  200.0  215.0  250.0  04/01/2017
+            Jones LLC   200.0  150.0  140.0  180.0  04/01/2017
+            Yellow Inc   90.0    NaN    NaN    NaN  04/01/2017
+            Red Inc     200.0  150.0  140.0    NaN  04/01/2017
+            Orange Inc  210.0    NaN    NaN  250.0  04/01/2017
+            # Example 1: Check if column 'Feb' is between 100 and 200.
+            >>> new_df = df[df.Feb.between(100, 200)]
+            >>> print(new_df)
+                       Feb    Jan  Mar  Apr    datetime
+            accounts
+            Jones LLC  200.0  150  140  180.0  04/01/2017
+            Red Inc    200.0  150  140    NaN  04/01/2017
+            # Example 2: Check if column 'datetime' is between '01-01-2017' and '30-01-2017'.
+            >>> new_df = df[df.datetime.between('01-01-2017', '30-01-2017')]
+            >>> print(new_df)
+                        Feb    Jan    Mar    Apr    datetime
+            accounts
+            Jones LLC   200.0  150.0  140.0  180.0  04/01/2017
+            Blue Inc     90.0   50.0   95.0  101.0  04/01/2017
+            Yellow Inc   90.0    NaN    NaN    NaN  04/01/2017
+            Red Inc     200.0  150.0  140.0    NaN  04/01/2017
+            Alpha Co    210.0  200.0  215.0  250.0  04/01/2017
+            Orange Inc  210.0    NaN    NaN  250.0  04/01/2017
+        """
+        return _SQLColumnExpression(self.expression.between(lower, upper))
+    def begin(self):
+        """
+        DESCRIPTION:
+            Retrieves the beginning date or timestamp from a PERIOD column.
+        PARAMETERS:
+            None.
+        RETURNS:
+            ColumnExpression.
+        RAISES:
+            TeradataMlException.
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml", "Employee_roles")
+            # Create a DataFrame on 'employee_roles' table.
+            >>> df = DataFrame("employee_roles")
+            # Extract the starting date from the period column 'role_validity_period'
+            # and assign it to a new column.
+            >>> df = df.assign(start_date_col = df['role_validity_period'].begin())
+            EmployeeID	    EmployeeName     Department      Salary	      role_validity_period	 start_date_col
+                     1	        John Doe             IT     100.000	  ('20/01/01', '24/12/31')	       20/01/01
+                     2	      Jane Smith             DA     200.000	  ('20/01/01', '99/12/31') 	       20/01/01
+                     3	             Bob      Marketing     330.000	  ('25/01/01', '99/12/31')	       25/01/01
+                     3	             Bob          Sales     300.000	  ('24/01/01', '24/12/31')	       24/01/01
+        """
+        _Validators._validate_period_column_type(self._type)
+        element_type = DATE if isinstance(self._type, PERIOD_DATE) else TIMESTAMP
+        return _SQLColumnExpression(literal_column(f"BEGIN({self.compile()})"), type = element_type)
+    def end(self):
+        """
+        DESCRIPTION:
+            Retrieves the ending date or timestamp from a PERIOD column.
+        PARAMETERS:
+            None.
+        RETURNS:
+            ColumnExpression.
+        RAISES:
+            TeradataMlException.
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml", "Employee_roles")
+            # Create a DataFrame on 'employee_roles' table.
+            >>> df = DataFrame("employee_roles")
+            # Extract the ending date from the period column 'role_validity_period'
+            # and assign it to a new column.
+            >>> df = df.assign(end_date_col = df['role_validity_period'].end())
+            EmployeeID	  EmployeeName   Department      Salary	       role_validity_period	    end_date_col
+                     1	      John Doe	         IT     100.000	   ('20/01/01', '24/12/31')	        24/12/31
+                     2	    Jane Smith	         DA     200.000	   ('20/01/01', '99/12/31')	        99/12/31
+                     3	           Bob	  Marketing     330.000	   ('25/01/01', '99/12/31')	        99/12/31
+                     3	           Bob	      Sales     300.000	   ('24/01/01', '24/12/31')	        24/12/31
+        """
+        _Validators._validate_period_column_type(self._type)
+        element_type = DATE if isinstance(self._type, PERIOD_DATE) else TIMESTAMP
+        return _SQLColumnExpression(literal_column(f"END({self.compile()})"), type = element_type)

teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.6py3-none-any.whl → 20.0.0.7py3-none-any.whl