PyPI - teradataml - Versions diffs - 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl - Mend

teradataml 20.0.0.1py3-none-any.whl → 20.0.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (240) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/LICENSE.pdf +0 -0
teradataml/README.md +306 -0
teradataml/__init__.py +10 -3
teradataml/_version.py +1 -1
teradataml/analytics/__init__.py +3 -2
teradataml/analytics/analytic_function_executor.py +299 -16
teradataml/analytics/analytic_query_generator.py +92 -0
teradataml/analytics/byom/__init__.py +3 -2
teradataml/analytics/json_parser/metadata.py +13 -3
teradataml/analytics/json_parser/utils.py +13 -6
teradataml/analytics/meta_class.py +40 -1
teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
teradataml/analytics/sqle/__init__.py +11 -2
teradataml/analytics/table_operator/__init__.py +4 -3
teradataml/analytics/uaf/__init__.py +21 -2
teradataml/analytics/utils.py +66 -1
teradataml/analytics/valib.py +1 -1
teradataml/automl/__init__.py +1502 -323
teradataml/automl/custom_json_utils.py +139 -61
teradataml/automl/data_preparation.py +247 -307
teradataml/automl/data_transformation.py +32 -12
teradataml/automl/feature_engineering.py +325 -86
teradataml/automl/model_evaluation.py +44 -35
teradataml/automl/model_training.py +122 -153
teradataml/catalog/byom.py +8 -8
teradataml/clients/pkce_client.py +1 -1
teradataml/common/__init__.py +2 -1
teradataml/common/constants.py +72 -0
teradataml/common/deprecations.py +13 -7
teradataml/common/garbagecollector.py +152 -120
teradataml/common/messagecodes.py +11 -2
teradataml/common/messages.py +4 -1
teradataml/common/sqlbundle.py +26 -4
teradataml/common/utils.py +225 -14
teradataml/common/wrapper_utils.py +1 -1
teradataml/context/context.py +82 -2
teradataml/data/SQL_Fundamentals.pdf +0 -0
teradataml/data/complaints_test_tokenized.csv +353 -0
teradataml/data/complaints_tokens_model.csv +348 -0
teradataml/data/covid_confirm_sd.csv +83 -0
teradataml/data/dataframe_example.json +27 -1
teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
teradataml/data/dwt2d_dataTable.csv +65 -0
teradataml/data/dwt_dataTable.csv +8 -0
teradataml/data/dwt_filterTable.csv +3 -0
teradataml/data/finance_data4.csv +13 -0
teradataml/data/grocery_transaction.csv +19 -0
teradataml/data/idwt2d_dataTable.csv +5 -0
teradataml/data/idwt_dataTable.csv +8 -0
teradataml/data/idwt_filterTable.csv +3 -0
teradataml/data/interval_data.csv +5 -0
teradataml/data/jsons/paired_functions.json +14 -0
teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
teradataml/data/load_example_data.py +8 -2
teradataml/data/medical_readings.csv +101 -0
teradataml/data/naivebayestextclassifier_example.json +1 -1
teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
teradataml/data/patient_profile.csv +101 -0
teradataml/data/peppers.png +0 -0
teradataml/data/real_values.csv +14 -0
teradataml/data/sax_example.json +8 -0
teradataml/data/scripts/deploy_script.py +1 -1
teradataml/data/scripts/lightgbm/dataset.template +157 -0
teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
teradataml/data/star_pivot.csv +8 -0
teradataml/data/target_udt_data.csv +8 -0
teradataml/data/templates/open_source_ml.json +3 -1
teradataml/data/teradataml_example.json +20 -1
teradataml/data/timestamp_data.csv +4 -0
teradataml/data/titanic_dataset_unpivoted.csv +19 -0
teradataml/data/uaf_example.json +55 -1
teradataml/data/unpivot_example.json +15 -0
teradataml/data/url_data.csv +9 -0
teradataml/data/vectordistance_example.json +4 -0
teradataml/data/windowdfft.csv +16 -0
teradataml/dataframe/copy_to.py +1 -1
teradataml/dataframe/data_transfer.py +5 -3
teradataml/dataframe/dataframe.py +1002 -201
teradataml/dataframe/fastload.py +3 -3
teradataml/dataframe/functions.py +867 -0
teradataml/dataframe/row.py +160 -0
teradataml/dataframe/setop.py +2 -2
teradataml/dataframe/sql.py +840 -33
teradataml/dataframe/window.py +1 -1
teradataml/dbutils/dbutils.py +878 -34
teradataml/dbutils/filemgr.py +48 -1
teradataml/geospatial/geodataframe.py +1 -1
teradataml/geospatial/geodataframecolumn.py +1 -1
teradataml/hyperparameter_tuner/optimizer.py +13 -13
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/__init__.py +1 -1
teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
teradataml/opensource/_lightgbm.py +950 -0
teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
teradataml/opensource/sklearn/__init__.py +0 -1
teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
teradataml/options/__init__.py +9 -23
teradataml/options/configure.py +42 -4
teradataml/options/display.py +2 -2
teradataml/plot/axis.py +4 -4
teradataml/scriptmgmt/UserEnv.py +13 -9
teradataml/scriptmgmt/lls_utils.py +77 -23
teradataml/store/__init__.py +13 -0
teradataml/store/feature_store/__init__.py +0 -0
teradataml/store/feature_store/constants.py +291 -0
teradataml/store/feature_store/feature_store.py +2223 -0
teradataml/store/feature_store/models.py +1505 -0
teradataml/store/vector_store/__init__.py +1586 -0
teradataml/table_operators/Script.py +2 -2
teradataml/table_operators/TableOperator.py +106 -20
teradataml/table_operators/query_generator.py +3 -0
teradataml/table_operators/table_operator_query_generator.py +3 -1
teradataml/table_operators/table_operator_util.py +102 -56
teradataml/table_operators/templates/dataframe_register.template +69 -0
teradataml/table_operators/templates/dataframe_udf.template +63 -0
teradataml/telemetry_utils/__init__.py +0 -0
teradataml/telemetry_utils/queryband.py +52 -0
teradataml/utils/dtypes.py +4 -2
teradataml/utils/validators.py +34 -2
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0

teradataml/dataframe/sql.py CHANGED Viewed

@@ -40,7 +40,7 @@ import sqlalchemy as sqlalc
 import re
-from teradatasqlalchemy.dialect import dialect as td_dialect, compiler as td_compiler
+from teradatasqlalchemy.dialect import dialect as td_dialect, compiler as td_compiler, TeradataTypeCompiler as td_type_compiler
 from teradatasqlalchemy import (INTEGER, SMALLINT, BIGINT, BYTEINT, DECIMAL, FLOAT, NUMBER)
 from teradatasqlalchemy import (DATE, TIME, TIMESTAMP)
 from teradatasqlalchemy import (BYTE, VARBYTE, BLOB)
@@ -52,7 +52,7 @@ from teradatasqlalchemy import (INTERVAL_DAY, INTERVAL_DAY_TO_HOUR, INTERVAL_DAY
                                 INTERVAL_YEAR_TO_MONTH)
 from teradatasqlalchemy import (PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP)
 from teradatasqlalchemy import XML, GEOMETRY
-from teradatasqlalchemy.telemetry.queryband import collect_queryband
+from teradataml.telemetry_utils.queryband import collect_queryband
 import decimal
 import datetime as dt
 from teradataml.dataframe.window import Window
@@ -222,6 +222,10 @@ class _MetaExpression(object):
     def __repr__(self):
       return repr(self.__t)
+    def _get_table_expr(self):
+        return self.__t
 class _PandasTableExpression(TableExpression):
     def _assign(self, drop_columns, **kw):
@@ -484,6 +488,7 @@ class _SQLTableExpression(_PandasTableExpression):
             columns = []
             for c in kw['column_order']:
                 name = c.strip()
+                # Get case-insensitive column names from Table object.
                 col = table.c.get(name, table.c.get(name.lower(), table.c.get(name.upper())))
                 if col is None:
@@ -612,8 +617,9 @@ class _SQLTableExpression(_PandasTableExpression):
                 expression = display_number(c.expression)
             elif isinstance(c.type, tuple(datetime_period_types)):
                 expression = cast_expr(c.expression, 30)
+            # Change the size as INTERVAL_DAY_TO_SECOND(4, 6) is failing.
             elif isinstance(c.type, tuple(interval_types)):
-                expression = cast_expr(c.expression, 20)
+                expression = cast_expr(c.expression, 25)
             elif isinstance(c.type, GEOMETRY):
                 expression = cast_expr(c.expression, display.geometry_column_length) if \
                     display.geometry_column_length is not None else c.expression.label(c.name)
@@ -5461,14 +5467,19 @@ class _SQLColumnExpression(_LogicalColumnExpression,
             expression = literal_column(expression)
         self.kw = kw
         self.expression = expression
-        self.type = kw.get("type", expression.type)
+        self.type = kw.get("type", expression.type if expression is not None else kw.get("udf_type"))
         # Initial ColumnExpression has only one dataframe and hence
         # __has_multiple_dataframes = False.
         # eg: df1.col1, df2.col2
         self.__has_multiple_dataframes = False
         self.__names = []
-        self.alias_name = self.compile()
+        self._udf = kw.get("udf", None)
+        self._udf_args = kw.get("udf_args", None)
+        self._env_name = kw.get("env_name", None)
+        self._delimiter = kw.get("delimiter", None)
+        self._quotechar = kw.get("quotechar", None)
+        self._udf_script = kw.get("udf_script", None)
+        self.alias_name = self.compile() if (self._udf or self._udf_script) is None else None
     @property
     def expression(self):
@@ -5648,23 +5659,23 @@ class _SQLColumnExpression(_LogicalColumnExpression,
         """
         Calls the compile method of the underlying sqlalchemy.Column
         """
-        if len(kw) == 0:
-            kw = dict({'dialect': td_dialect(),
-                'compile_kwargs':
-                    {
-                        'include_table': False,
-                        'literal_binds': True
-                    }
-                })
-        return str(self.expression.compile(*args, **kw))
+        kw_new = dict({'dialect': td_dialect(),
+                       'compile_kwargs':
+                           {
+                                'include_table': False,
+                                'literal_binds': True
+                           }
+                       })
+        if len(kw) != 0:
+            kw_new.update(kw)
+        return str(self.expression.compile(*args, **kw_new))
     def compile_label(self, label):
         """
         DESCRIPTION:
             Compiles expression with label, by calling underlying sqlalchemy methods.
-        PARAMETES:
+        PARAMETERS:
             label:
                 Required Argument.
                 Specifies the label to be used to alias the compiled expression.
@@ -5694,7 +5705,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
             with the "value". Use this function either to replace or remove
             NA from Column.
-        PARAMETES:
+        PARAMETERS:
             value:
                 Required Argument.
                 Specifies the replacement value for null values in the column.
@@ -5833,7 +5844,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
         return _SQLColumnExpression(func.concat(*columns_))
     @collect_queryband(queryband="DFC_cast")
-    def cast(self, type_ = None):
+    def cast(self, type_ = None, format = None, timezone = None):
         """
         DESCRIPTION:
             Apply the CAST SQL function to the column with the type specified.
@@ -5849,6 +5860,32 @@ class _SQLColumnExpression(_LogicalColumnExpression,
                 Default value: None
                 Types: teradatasqlalchemy type or object of teradatasqlalchemy type
+            format:
+                Optional Argument.
+                Specifies a variable length string containing formatting characters
+                that define the display format for the data type.
+                Formats can be specified for columns that have character, numeric, byte,
+                DateTime, Period or UDT data types.
+                Note:
+                    * Teradata supports different formats. Look at 'Formats' section in
+                      "SQL-Data-Types-and-Literals" in Vantage documentation for additional
+                      details.
+                Default value: None
+                Types: str
+            timezone:
+                Optional Argument.
+                Specifies the timezone string.
+                Check "SQL-Date-and-Time-Functions-and-Expressions" in
+                Vantage documentation for supported timezones.
+                Type: ColumnExpression or str.
+        RETURNS:
+            ColumnExpression
+        RAISES:
+            TeradataMlException
         EXAMPLES:
             >>> load_example_data("dataframe","admissions_train")
             >>> df = DataFrame('admissions_train')
@@ -5873,8 +5910,24 @@ class _SQLColumnExpression(_LogicalColumnExpression,
             programming      str
             admitted         int
-            >>> # Let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR(5),
-            >>> # an object of a teradatasqlalchemy type.
+            >>> dataframe_dict = {"id": [100, 200,300],
+            >>> "timestamp_col": ['1000-01-10 23:00:12-02:00', '2015-01-08 13:00:00+12:00', '2014-12-10 10:00:35-08:00'],
+            >>> "timezone_col": ["GMT", "America Pacific", "GMT+10"]}
+            >>> pandas_df = pd.DataFrame(dataframe_dict)
+            >>> copy_to_sql(pandas_df, table_name = 'new_table', if_exists = 'replace')
+            >>> df1 = DataFrame("new_table")
+            >>> df1
+            id              timestamp_col     timezone_col
+            300  2014-12-10 10:00:35-08:00           GMT+10
+            200  2015-01-08 13:00:00+12:00  America Pacific
+            100  1000-01-10 23:00:12-02:00              GMT
+            >>> df1.dtypes
+            id               int
+            timestamp_col    str
+            timezone_col     str
+            # Example 1: Let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR(5),
+            #            an object of a teradatasqlalchemy type.
             >>> from teradatasqlalchemy import VARCHAR
             >>> new_df = df.assign(char_id = df.id.cast(type_=VARCHAR(5)))
             >>> new_df
@@ -5899,8 +5952,8 @@ class _SQLColumnExpression(_LogicalColumnExpression,
             admitted         int
             char_id          str
-            >>> # Now let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR,
-            >>> # a teradatasqlalchemy type.
+            # Example 2:  Now let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR,
+            #             a teradatasqlalchemy type.
             >>> new_df_2 = df.assign(char_id = df.id.cast(type_=VARCHAR))
             >>> new_df_2
                masters   gpa     stats programming  admitted char_id
@@ -5924,25 +5977,65 @@ class _SQLColumnExpression(_LogicalColumnExpression,
             admitted         int
             char_id          str
-            >>> # Let's try filtering some data with a match on a column cast to another type,
-            >>> # an object of a teradatasqlalchemy type.
+            # Example 3: Let's try filtering some data with a match on a column cast to another type,
+            #            an object of a teradatasqlalchemy type.
             >>> df[df.id.cast(VARCHAR(5)) == '1']
                masters   gpa     stats programming  admitted
             id
             1      yes  3.95  Beginner    Beginner         0
-            >>> # Now let's try the same, this time using a teradatasqlalchemy type.
+            # Example 4: Now let's try the same, this time using a teradatasqlalchemy type.
             >>> df[df.id.cast(VARCHAR) == '1']
                masters   gpa     stats programming  admitted
             id
             1      yes  3.95  Beginner    Beginner         0
-        RETURNS:
-            ColumnExpression
+            # Example 5: Let's try creating a new DataFrame casting 'timestamp_col' column (of type VARCHAR) to TIMESTAMP,
+            #            using format.
+            >>> new_df1 = df1.assign(new_col = df1.timestamp_col.cast(TIMESTAMP, format='Y4-MM-DDBHH:MI:SSBZ'))
+            id              timestamp_col     timezone_col              new_col
+            300  2014-12-10 10:00:35-08:00           GMT+10  2014-12-10 18:00:35
+            200  2015-01-08 13:00:00+12:00  America Pacific  2015-01-08 01:00:00
+            100  1000-01-10 23:00:12-02:00              GMT  1000-01-11 01:00:12
+            >>> new_df1.tdtypes
+            id                             int
+            timestamp_col                  str
+            timezone_col                   str
+            new_col          datetime.datetime
+            # Example 6: Let's try creating a new DataFrame casting 'id' column (of type INTEGER) to VARCHAR,
+            #            using format.
+            >>> new_df2 = df1.assign(new_col = df1.id.cast(VARCHAR, format='zzz.zz'))
+            id              timestamp_col     timezone_col new_col
+            300  2014-12-10 10:00:35-08:00           GMT+10  300.00
+            200  2015-01-08 13:00:00+12:00  America Pacific  200.00
+            100  1000-01-10 23:00:12-02:00              GMT  100.00
+            >>> new_df2.dtypes
+            id               int
+            timestamp_col    str
+            timezone_col     str
+            new_col          str
+            # Example 7: Let's try creating a new DataFrame casting 'timestamp_with_timezone' column (of type TIMESTAMP) to
+            #            TIMESTAMP WITH TIMEZONE, with offset 'GMT+10'.
+            >>> new_df3 = new_df1.assign(timestamp_with_timezone = new_df1.new_col.cast(TIMESTAMP(timezone=True), timezone='GMT+10'))
+            id              timestamp_col     timezone_col              new_col         timestamp_with_timezone
+            300  2014-12-10 10:00:35-08:00           GMT+10  2014-12-10 18:00:35  2014-12-11 04:00:35.000000+10:00
+            200  2015-01-08 13:00:00+12:00  America Pacific  2015-01-08 01:00:00  2015-01-08 11:00:00.000000+10:00
+            100  1000-01-10 23:00:12-02:00              GMT  1000-01-11 01:00:12  1000-01-11 11:00:12.000000+10:00
+            >>> new_df3.dtypes
+            id                                       int
+            timestamp_col                            str
+            timezone_col                             str
+            new_col                    datetime.datetime
+            timestamp_with_timezone    datetime.datetime
+        """
+        # Validating Arguments
+        arg_type_matrix = []
+        arg_type_matrix.append(["format", format , True, (str), True])
+        arg_type_matrix.append(["timezone", timezone, True, (str, ColumnExpression, int, float), True])
+        _Validators._validate_function_arguments(arg_type_matrix)
-        RAISES:
-            TeradataMlException
-        """
         # If type_ is None or not specified, raise an Exception
         if type_ is None:
             raise TeradataMlException(Messages.get_message(MessageCodes.MISSING_ARGS, 'type_'),
@@ -5953,8 +6046,26 @@ class _SQLColumnExpression(_LogicalColumnExpression,
             raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, 'type_',
                                                            'a valid teradatasqlalchemy type'),
                                       MessageCodes.UNSUPPORTED_DATATYPE)
         expression = func.cast(self.expression, type_=type_).label(self.name)
+        if format or timezone:
+            # Casting to VARCHAR or CHAR with format require this type of query
+            # CAST((CAST (F1 AS FORMAT 'format_str')) AS [CHAR|VARCHAR])
+            if isinstance(type_, (VARCHAR, CHAR)) or (isinstance(type_, type) and issubclass(type_, (VARCHAR, CHAR))):
+                expression = func.cast(literal_column("""CAST({} AS FORMAT '{}')""".format(self.compile(), format)), type_=type_)
+            else:
+                # Compile _TDType to string
+                type_compiler = td_type_compiler(td_dialect)
+                type_expression = type_compiler.process(type_) if not isinstance(type_, type) else type_compiler.process(type_())
+                # Create a query with format and timezone string
+                # CAST(TIMESTAMP "column_name" AS "_TDType" FORMAT "format" AT TIMEZONE "timezone_str")
+                format =  " FORMAT '{}'".format(format) if format else ""
+                if timezone and isinstance(timezone, _SQLColumnExpression):
+                    timezone = _SQLColumnExpression(literal_column(f' AT TIME ZONE {timezone.compile()}')).compile()
+                elif timezone:
+                    timezone = _SQLColumnExpression(literal_column(_SQLColumnExpression._timezone_string(timezone))).compile()
+                else:
+                    timezone = ""
+                expression = literal_column("""CAST({} AS {}{}{})""".format(self.compile(), type_expression, timezone, format), type_=type_)
         return _SQLColumnExpression(expression)
     def __hash__(self):
@@ -6081,12 +6192,19 @@ class _SQLColumnExpression(_LogicalColumnExpression,
                     # If user has not passed any type, then set it to
                     # NullType().
                     type = sqlalc.sql.sqltypes.NullType()
+                # Boolean flag to treat function as an instance method.
+                function_has_col_caller = column_function
                 # Generate the function syntax based on whether the
                 # function is column function or not.
                 if column_function:
                     name = quoted_name("{}.{}".format(col_name, func_name),
                                        False)
+                    # Dynamic function gets called on teradataml._SQLColumnExpression type object.
+                    # 'expression' attribute of _SQLColumnExpression object holds
+                    # corresponding SQLAlchemy.Expression type object.
+                    # SQLAlchemy.Expression type object should be available from FunctionElement.
+                    # This 'func_caller' attribute points to that Expression object.
+                    func_caller = self.expression
                 else:
                     name = quoted_name(func_name, False)
@@ -10172,3 +10290,692 @@ class _SQLColumnExpression(_LogicalColumnExpression,
         self.alias_name = name
         return self
+    @staticmethod
+    def _timezone_string(value):
+        """
+        DESCRIPTION:
+            Function to return timezone string in correct format.
+        PARAMETERS:
+            value:
+                Required Argument.
+                Specifies timezone string.
+                Types: str, int , float
+        RETURNS:
+            bool
+        """
+        if isinstance(value, (float, int)):
+            return " AT TIME ZONE {}".format(value)
+        if value.upper() not in ['LOCAL']:
+            return " AT TIME ZONE '{}'".format(value)
+        return " AT {}".format(value)
+    def to_timestamp(self, format=None, type_=TIMESTAMP, timezone=None):
+        """
+        DESCRIPTION:
+            Converts string or integer to a TIMESTAMP data type or TIMESTAMP WITH
+            TIME ZONE data type.
+            Note:
+                * POSIX epoch conversion is implicit in the "to_timestamp" when column
+                  is integer type. POSIX epoch is the number of seconds that have elapsed
+                  since midnight Coordinated Universal Time (UTC) of January 1, 1970.
+        PARAMETERS:
+            format:
+                Specifies the format of string column.
+                Argument is not required when column is integer type, Otherwise Required.
+                For valid 'format' values, see documentation on
+                "to_date" or "help(df.col_name.to_date)".
+                Type: ColumnExpression or str
+            type_:
+                Optional Argument.
+                Specifies a TIMESTAMP type or an object of a
+                TIMESTAMP type that the column needs to be cast to.
+                Default value: TIMESTAMP
+                Permitted Values: TIMESTAMP data type
+                Types: teradatasqlalchemy type or object of teradatasqlalchemy type
+            timezone:
+                Optional Argument.
+                Specifies the timezone string.
+                For valid timezone strings, user should check Vantage documentation.
+                Type: ColumnExpression or str.
+        RETURNS:
+            ColumnExpression
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml", "timestamp_data")
+            # Create a DataFrame on 'timestamp_data' table.
+            >>> df = DataFrame("timestamp_data")
+            >>> df
+            id                timestamp_col  timestamp_col1                         format_col     timezone_col
+            2  2015-01-08 00:00:12.2+10:00     45678910234  YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM           GMT+10
+            1             2015-01-08 13:00          878986                 YYYY-MM-DD HH24:MI  America Pacific
+            0        2015-01-08 00:00:12.2          123456          YYYY-MM-DD HH24:MI:SS.FF6              GMT
+            >>> df.tdtypes
+            id                                          INTEGER()
+            timestamp_col     VARCHAR(length=30, charset='LATIN')
+            timestamp_col1                               BIGINT()
+            format_col        VARCHAR(length=30, charset='LATIN')
+            timezone_col      VARCHAR(length=30, charset='LATIN')
+            # Example 1: Convert Epoch seconds to timestamp.
+            >>> df.select(['id','timestamp_col1']).assign(col = df.timestamp_col1.to_timestamp())
+            id  timestamp_col1                         col
+            2     45678910234  3417-07-05 02:10:34.000000
+            1          878986  1970-01-11 04:09:46.000000
+            0          123456  1970-01-02 10:17:36.000000
+            # Example 2: Convert timestamp string to timestamp with timezone in
+            #            format mentioned in column "format_col".
+            >>> df.select(['id', 'timestamp_col', 'format_col']).assign(col = df.timestamp_col.to_timestamp(df.format_col, TIMESTAMP(timezone=True)))
+            id                timestamp_col                         format_col                             col
+            2  2015-01-08 00:00:12.2+10:00  YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM  2015-01-08 00:00:12.200000+10:00
+            1             2015-01-08 13:00                 YYYY-MM-DD HH24:MI  2015-01-08 13:00:00.000000+00:00
+            0        2015-01-08 00:00:12.2          YYYY-MM-DD HH24:MI:SS.FF6  2015-01-08 00:00:12.200000+00:00
+            # Example 3: Convert Epoch seconds to timestamp with timezone in 'GMT+2' location.
+            >>> df.select(['id', 'timestamp_col1', 'format_col']).assign(col = df.timestamp_col1.to_timestamp(df.format_col, TIMESTAMP(timezone=True), 'GMT+2'))
+            id  timestamp_col1                         format_col                             col
+            2     45678910234  YYYY-MM-DD HH24:MI:SS.FF6 TZH:TZM  3417-07-05 04:10:34.000000+02:00
+            1          878986                 YYYY-MM-DD HH24:MI  1970-01-11 06:09:46.000000+02:00
+            0          123456          YYYY-MM-DD HH24:MI:SS.FF6  1970-01-02 12:17:36.000000+02:00
+        """
+        # Validating Arguments
+        arg_type_matrix = []
+        arg_type_matrix.append(["format", format , True, (str, ColumnExpression), True])
+        arg_type_matrix.append(["timezone", timezone, True, (str, ColumnExpression, int, float), True])
+        _Validators._validate_function_arguments(arg_type_matrix)
+        if not UtilFuncs._is_valid_td_type(type_):
+            raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, 'type_',
+                                                           'a valid teradatasqlalchemy type'),
+                                      MessageCodes.UNSUPPORTED_DATATYPE)
+        _format = format.expression if isinstance(format, _SQLColumnExpression) else format
+        _params = [self.expression, _format]
+        # format is not required when column is of below types.
+        if isinstance(self._type, (BYTEINT, SMALLINT, INTEGER, BIGINT)):
+            _params.pop()
+        # Use to_timestamp_tz when below 3 conditions are true.
+        # Resultant query will be Example:
+        # TO_TIMESTAMP('2015-10-08 00:00:12.2') or TO_TIMESTAMP_TZ('2015-10-08 00:00:12.2+03:00') based on type_
+        _fun = getattr(func, "to_timestamp_tz") if isinstance(type_, TIMESTAMP) and type_.timezone and len(_params) == 2 \
+            else getattr(func, "to_timestamp")
+        if not timezone:
+            return _SQLColumnExpression(_fun(*_params), type=type_)
+        # If user uses timezone generate query with time zone.
+        # Resultant query will be Example:
+        # TO_TIMESTAMP('2015-10-08 00:00:12.2') at time zone 'America Alaska',
+        # TO_TIMESTAMP_TZ('2015-10-08 00:00:12.2+03:00') at time zone 'America Alaska'.
+        if isinstance(timezone, _SQLColumnExpression):
+            _timezone_expr = _SQLColumnExpression(literal_column(f' AT TIME ZONE {timezone.compile()}')).compile()
+        else:
+            _timezone_expr = _SQLColumnExpression(literal_column(_SQLColumnExpression._timezone_string(timezone))).compile()
+        return _SQLColumnExpression(_SQLColumnExpression(_fun(*_params)).compile() + _timezone_expr, type=type_)
+    def extract(self, value, timezone=None):
+        """
+        DESCRIPTION:
+            Extracts a single specified field from any DateTime, Interval or timestamp value,
+            converting it to an exact numeric value.
+        PARAMETERS:
+            value:
+                Required Argument.
+                Specifies the field which needs to be extracted.
+                Permitted Values: YEAR, MONTH, DAY, HOUR, MINUTE, SECOND, TIMEZONE_HOUR, TIMEZONE_MINUTE
+                Note:
+                    * Permitted Values are case insensitive.
+                Type: str
+            timezone:
+                Optional Argument.
+                Specifies the timezone string.
+                For valid timezone strings, user should check Vantage documentation.
+                Type: ColumnExpression or str.
+        RETURNS:
+            ColumnExpression
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("uaf", "Traindata")
+            # Create a DataFrame on 'Traindata' table.
+            >>> temp_df = DataFrame("Traindata")
+            >>> df = temp_df.select(["seq_no", "schedule_date", "arrivalTime"])
+            >>> df
+                    schedule_date          arrivalTime
+            seq_no
+            26          16/03/26  2016-03-26 12:33:05
+            24          16/03/26  2016-03-26 12:25:06
+            3           16/03/26  2016-03-26 10:52:05
+            22          16/03/26  2016-03-26 12:18:01
+            20          16/03/26  2016-03-26 12:10:06
+            18          16/03/26  2016-03-26 12:04:01
+            8           16/03/26  2016-03-26 11:15:06
+            17          16/03/26  2016-03-26 11:56:06
+            15          16/03/26  2016-03-26 11:45:00
+            13          16/03/26  2016-03-26 11:33:00
+            11          16/03/26  2016-03-26 11:26:00
+            # Example 1: Extract year from column 'schedule_date'.
+            >>> df.assign(col = df.schedule_date.extract('YEAR'))
+                    schedule_date          arrivalTime   col
+            seq_no
+            26          16/03/26  2016-03-26 12:33:05  2016
+            24          16/03/26  2016-03-26 12:25:06  2016
+            3           16/03/26  2016-03-26 10:52:05  2016
+            22          16/03/26  2016-03-26 12:18:01  2016
+            20          16/03/26  2016-03-26 12:10:06  2016
+            18          16/03/26  2016-03-26 12:04:01  2016
+            8           16/03/26  2016-03-26 11:15:06  2016
+            17          16/03/26  2016-03-26 11:56:06  2016
+            15          16/03/26  2016-03-26 11:45:00  2016
+            13          16/03/26  2016-03-26 11:33:00  2016
+            11          16/03/26  2016-03-26 11:26:00  2016
+            # Example 2: Extract hour from column 'arrivalTime'.
+            >>> df.assign(col = df.arrivalTime.extract('HOUR'))
+                    schedule_date          arrivalTime col
+            seq_no
+            26          16/03/26  2016-03-26 12:33:05  12
+            24          16/03/26  2016-03-26 12:25:06  12
+            3           16/03/26  2016-03-26 10:52:05  10
+            22          16/03/26  2016-03-26 12:18:01  12
+            20          16/03/26  2016-03-26 12:10:06  12
+            18          16/03/26  2016-03-26 12:04:01  12
+            8           16/03/26  2016-03-26 11:15:06  11
+            17          16/03/26  2016-03-26 11:56:06  11
+            15          16/03/26  2016-03-26 11:45:00  11
+            # Example 3: Extract hour from column 'arrivalTime' with offset '-11:00'.
+            >>> df.assign(col = df.arrivalTime.extract('HOUR', '-11:00'))
+                    schedule_date          arrivalTime col
+            seq_no
+            26          16/03/26  2016-03-26 12:33:05   1
+            24          16/03/26  2016-03-26 12:25:06   1
+            3           16/03/26  2016-03-26 10:52:05  23
+            22          16/03/26  2016-03-26 12:18:01   1
+            20          16/03/26  2016-03-26 12:10:06   1
+            18          16/03/26  2016-03-26 12:04:01   1
+            8           16/03/26  2016-03-26 11:15:06   0
+            17          16/03/26  2016-03-26 11:56:06   0
+            15          16/03/26  2016-03-26 11:45:00   0
+            # Example 4: Extract hour from column 'arrivalTime' with offset 10.
+            >>> df.assign(col = df.arrivalTime.extract('HOUR', 10))
+                    schedule_date          arrivalTime col
+            seq_no
+            26          16/03/26  2016-03-26 12:33:05  22
+            24          16/03/26  2016-03-26 12:25:06  22
+            3           16/03/26  2016-03-26 10:52:05  20
+            22          16/03/26  2016-03-26 12:18:01  22
+            20          16/03/26  2016-03-26 12:10:06  22
+            18          16/03/26  2016-03-26 12:04:01  22
+            8           16/03/26  2016-03-26 11:15:06  21
+            17          16/03/26  2016-03-26 11:56:06  21
+            15          16/03/26  2016-03-26 11:45:00  21
+            13          16/03/26  2016-03-26 11:33:00  21
+            11          16/03/26  2016-03-26 11:26:00  21
+        """
+        # Validating Arguments
+        arg_type_matrix = []
+        arg_type_matrix.append(["value", value , True, (str), True])
+        arg_type_matrix.append(["timezone", timezone, True, (str, ColumnExpression, int, float), True])
+        _Validators._validate_function_arguments(arg_type_matrix)
+        # If user doesn't provide timezone simply use extract functionality.
+        if not timezone:
+            return _SQLColumnExpression(func.extract(value, self.expression))
+        # If user uses timezone generate query with time zone.
+        if isinstance(timezone, _SQLColumnExpression):
+            _timezone_expr = _SQLColumnExpression(literal_column(f' AT TIME ZONE {timezone.compile()}')).compile()
+        else:
+            _timezone_expr = _SQLColumnExpression(literal_column(_SQLColumnExpression._timezone_string(timezone))).compile()
+        return _SQLColumnExpression(func.extract(value, literal_column('({}{})'.format(self.compile(), _timezone_expr))))
+    def to_interval(self, value=None, type_=INTERVAL_DAY_TO_SECOND):
+        """
+        DESCRIPTION:
+            Converts a numeric value or string value into an INTERVAL_DAY_TO_SECOND or INTERVAL_YEAR_TO_MONTH value.
+        PARAMETERS:
+            value:
+                Optional, when column type is VARCHAR or CHAR, otherwise required.
+                Specifies the unit of value for numeric value.
+                when type_ is INTERVAL_DAY_TO_SECOND permitted values:
+                    * DAY, HOUR, MINUTE, SECOND
+                when type_ is INTERVAL_YEAR_TO_MONTH permitted values:
+                    * YEAR, MONTH
+                Note:
+                    * Permitted Values are case insensitive.
+                Type: str or ColumnExpression
+            type_:
+                Optional Argument.
+                Specifies a teradatasqlalchemy type or an object of a teradatasqlalchemy type
+                that the column needs to be cast to.
+                Default value: TIMESTAMP
+                Permitted Values: INTERVAL_DAY_TO_SECOND or INTERVAL_YEAR_TO_MONTH type.
+                Types: teradatasqlalchemy type or object of teradatasqlalchemy type
+        Returns:
+            ColumnExpression
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml", "interval_data")
+            # Create a DataFrame on 'interval_data' table.
+            >>> df = DataFrame("interval_data")
+            >>> df
+            id  int_col value_col value_col1        str_col1 str_col2
+            2      657    MINUTE      MONTH           PT73H    -P14M
+            3     1234    SECOND      MONTH    100 04:23:59    06-10
+            1      240      HOUR       YEAR  P100DT4H23M59S  P100Y4M
+            0       20       DAY       YEAR    100 04:23:59    04-10
+            >>> df.tdtypes
+            id                                      INTEGER()
+            int_col                                  BIGINT()
+            value_col     VARCHAR(length=30, charset='LATIN')
+            value_col1    VARCHAR(length=30, charset='LATIN')
+            str_col1      VARCHAR(length=30, charset='LATIN')
+            str_col2      VARCHAR(length=30, charset='LATIN')
+            # Example 1: Convert "int_col" column to INTERVAL_DAY_TO_SECOND with value
+            #            provided in "value_col".
+            >>> df.assign(col = df.int_col.to_interval(df.value_col))
+            id  int_col value_col value_col1        str_col1 str_col2                    col
+            2      657    MINUTE      MONTH           PT73H    -P14M      0 10:57:00.000000
+            3     1234    SECOND      MONTH    100 04:23:59    06-10      0 00:20:34.000000
+            1      240      HOUR       YEAR  P100DT4H23M59S  P100Y4M     10 00:00:00.000000
+            0       20       DAY       YEAR    100 04:23:59    04-10     20 00:00:00.000000
+            # Example 2: Convert int_col to INTERVAL_YEAR_TO_MONTH when value = 'MONTH'.
+            >>> df.assign(col = df.int_col.to_interval('MONTH', INTERVAL_YEAR_TO_MONTH))
+            id  int_col value_col value_col1        str_col1 str_col2       col
+            2      657    MINUTE      MONTH           PT73H    -P14M     54-09
+            3     1234    SECOND      MONTH    100 04:23:59    06-10    102-10
+            1      240      HOUR       YEAR  P100DT4H23M59S  P100Y4M     20-00
+            0       20       DAY       YEAR    100 04:23:59    04-10      1-08
+            # Example 3: Convert string column "str_col1" to INTERVAL_DAY_TO_SECOND.
+            >>> df.assign(col = df.str_col1.to_interval())
+            id  int_col value_col value_col1        str_col1 str_col2                    col
+            2      657    MINUTE      MONTH           PT73H    -P14M      3 01:00:00.000000
+            3     1234    SECOND      MONTH    100 04:23:59    06-10    100 04:23:59.000000
+            1      240      HOUR       YEAR  P100DT4H23M59S  P100Y4M    100 04:23:59.000000
+            0       20       DAY       YEAR    100 04:23:59    04-10    100 04:23:59.000000
+            # Example 4: Convert string column "str_col2" to INTERVAL_DAY_TO_MONTH.
+            >>> df.assign(col = df.str_col2.to_interval(type_=INTERVAL_YEAR_TO_MONTH))
+            id  int_col value_col value_col1        str_col1 str_col2       col
+            2      657    MINUTE      MONTH           PT73H    -P14M     -1-02
+            3     1234    SECOND      MONTH    100 04:23:59    06-10      6-10
+            1      240      HOUR       YEAR  P100DT4H23M59S  P100Y4M    100-04
+            0       20       DAY       YEAR    100 04:23:59    04-10      4-10
+        """
+        # Validating Arguments
+        arg_type_matrix = []
+        arg_type_matrix.append(["value", value , True, (str, ColumnExpression), True])
+        _Validators._validate_function_arguments(arg_type_matrix)
+        if not UtilFuncs._is_valid_td_type(type_):
+            raise TeradataMlException(Messages.get_message(MessageCodes.UNSUPPORTED_DATATYPE, 'type_',
+                                                           'a valid teradatasqlalchemy type'),
+                                      MessageCodes.UNSUPPORTED_DATATYPE)
+        # When column type is string, use either to_dsinterval or to_yminterval function based on "type_".
+        if isinstance(self._type, (VARCHAR, CHAR)):
+            _fun = (getattr(func, "to_dsinterval")) if isinstance(type_, INTERVAL_DAY_TO_SECOND)\
+                or (isinstance(type_, type) and issubclass(type_, INTERVAL_DAY_TO_SECOND)) \
+                else (getattr(func, "to_yminterval"))
+            return _SQLColumnExpression(_fun(self.expression), type=type_)
+        # When column type is integer or float type, use either numtodsinterval or numtoyminterval
+        # function based on "type_".
+        _fun = (getattr(func, "numtodsinterval")) if isinstance(type_, INTERVAL_DAY_TO_SECOND) \
+            or (isinstance(type_, type) and issubclass(type_, INTERVAL_DAY_TO_SECOND))\
+            else (getattr(func, "numtoyminterval"))
+        value = value.expression if isinstance(value, _SQLColumnExpression) else value
+        return _SQLColumnExpression(_fun(self.expression, value), type=type_)
+    def parse_url(self, url_part):
+        """
+        DESCRIPTION:
+            Extracts a specific part from the URL.
+        PARAMETERS:
+            url_part:
+                Required Argument.
+                Specifies which part to be extracted.
+                Permitted Values: HOST, PATH, QUERY, REF, PROTOCOL, FILE, AUTHORITY, USERINFO
+                Type: str or ColumnExpression
+        Returns:
+            ColumnExpression
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml", "url_data")
+            # Create a DataFrame on 'url_data' table.
+            >>> df = DataFrame("url_data")
+            >>> df
+                                                                        urls       part
+            id
+            3                                       https://www.facebook.com       HOST
+            6              smtp://user:password@smtp.example.com:21/file.txt   USERINFO
+            4   https://teracloud-pod-services-pod-account-service.dummyvalu      QUERY
+            2   https://example.net/path4/path5/path6?query4=value4#fragment        REF
+            0                                   http://example.com:8080/path       FILE
+            1                                      ftp://example.net:21/path       PATH
+            5                        http://pg.example.ml/path150#fragment90  AUTHORITY
+            7                                         https://www.google.com   PROTOCOL
+            # Example 1: Extract components from column 'urls' using column 'part'
+            >>> df.assign(col = df.urls.parse_url(df.part))
+                                                                        urls       part               col
+            id
+            3                                       https://www.facebook.com       HOST  www.facebook.com
+            6              smtp://user:password@smtp.example.com:21/file.txt   USERINFO     user:password
+            4   https://teracloud-pod-services-pod-account-service.dummyvalu      QUERY              None
+            2   https://example.net/path4/path5/path6?query4=value4#fragment        REF          fragment
+            0                                   http://example.com:8080/path       FILE             /path
+            1                                      ftp://example.net:21/path       PATH             /path
+            5                        http://pg.example.ml/path150#fragment90  AUTHORITY     pg.example.ml
+            7                                         https://www.google.com   PROTOCOL             https
+            >>>
+        """
+        # Validating Arguments
+        arg_type_matrix = []
+        arg_type_matrix.append(["url_part", url_part, False, (str, ColumnExpression), True])
+        _Validators._validate_function_arguments(arg_type_matrix)
+        # Regex pattern used to extract 'url_part' is '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'.
+        # teradataml does not support regex grouping hence in some cases first used 'regex_replace' and
+        # then 'regex_substr' or vice-versa.
+        _part_to_extract_dict = {'HOST': _SQLColumnExpression(
+            func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), '(//[^/?#]+@)|(//)|(:\d+)', ''),
+            type=VARCHAR()),
+                                 'PATH': _SQLColumnExpression(func.regexp_substr(
+                                     func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
+                                     '([^?#]*)'), type=VARCHAR()),
+                                 'QUERY': _SQLColumnExpression(func.ltrim(func.regexp_substr(
+                                     func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)', ''),
+                                     '\?([^#]*)'), '?'), type=VARCHAR()),
+                                 'REF': _SQLColumnExpression(func.ltrim(func.regexp_substr(
+                                     func.regexp_replace(self.expression,
+                                                         '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?', ''),
+                                     '(#(.*))'), '#'), type=VARCHAR()),
+                                 'PROTOCOL': _SQLColumnExpression(
+                                     func.rtrim(func.regexp_substr(self.expression, '^(([^:/?#]+):)'), ':'),
+                                     type=VARCHAR()),
+                                 'FILE': _SQLColumnExpression(func.regexp_substr(
+                                     func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
+                                     '([^?#]*)(\?([^#]*))?'), type=VARCHAR()),
+                                 'AUTHORITY': _SQLColumnExpression(
+                                     func.ltrim(func.regexp_substr(self.expression, '//([^/?#]*)'), '//'),
+                                     type=VARCHAR()),
+                                 'USERINFO': _SQLColumnExpression(func.rtrim(func.ltrim(
+                                     func.regexp_substr(func.regexp_substr(self.expression, '//([^/?#]*)'),
+                                                        '//[^/?#]+@'), '/'), '@'), type=VARCHAR())
+                                 }
+        if isinstance(url_part, str):
+            return _part_to_extract_dict[url_part]
+        whens = [(url_part == 'HOST', _part_to_extract_dict['HOST']),
+                 (url_part == 'PATH', _part_to_extract_dict['PATH'] ),
+                 (url_part == 'QUERY', _part_to_extract_dict['QUERY']),
+                 (url_part == 'REF', _part_to_extract_dict['REF']),
+                 (url_part == 'PROTOCOL', _part_to_extract_dict['PROTOCOL']),
+                 (url_part == 'FILE', _part_to_extract_dict['FILE']),
+                 (url_part == 'AUTHORITY', _part_to_extract_dict['AUTHORITY']),
+                 (url_part == 'USERINFO', _part_to_extract_dict['USERINFO'])]
+        from teradataml.dataframe.sql_functions import case
+        return case(whens)
+    def log(self, base):
+        """
+        DESCRIPTION:
+            Returns the logarithm value of the column with respect to 'base'.
+        PARAMETERS:
+            base:
+                Required Argument.
+                Specifies base of logarithm.
+                Type: int or float or ColumnExpression
+        Returns:
+            ColumnExpression
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml", "titanic")
+            # Create a DataFrame on 'titanic' table.
+            >>> titanic = DataFrame.from_table('titanic')
+            >>> df = titanic.select(["passenger", "age", "fare"])
+            >>> print(df)
+                        age      fare
+            passenger
+            326        36.0  135.6333
+            183         9.0   31.3875
+            652        18.0   23.0000
+            265         NaN    7.7500
+            530        23.0   11.5000
+            122         NaN    8.0500
+            591        35.0    7.1250
+            387         1.0   46.9000
+            734        23.0   13.0000
+            795        25.0    7.8958
+            >>>
+            # Example 1: Compute log values for column 'fare' using base as column 'age'.
+            >>> log_df = df.assign(fare_log=df.fare.log(df.age))
+            >>> print(log_df)
+                        age      fare  fare_log
+            passenger
+            326        36.0  135.6333  1.370149
+            183         9.0   31.3875  1.568529
+            652        18.0   23.0000  1.084807
+            40         14.0   11.2417  0.916854
+            774         NaN    7.2250       NaN
+            366        30.0    7.2500  0.582442
+            509        28.0   22.5250  0.934704
+            795        25.0    7.8958  0.641942
+            61         22.0    7.2292  0.639955
+            469         NaN    7.7250       NaN
+            >>>
+        """
+        # Validating Arguments
+        arg_type_matrix = []
+        arg_type_matrix.append(["base", base, False, (int, float, ColumnExpression), True])
+        _Validators._validate_function_arguments(arg_type_matrix)
+        # Handling cases when 'base' or 'self' column values are zero or when denominator is zero
+        from teradataml.dataframe.sql_functions import case
+        if not isinstance(base, _SQLColumnExpression):
+            whens = case([((self != 0) & (_SQLColumnExpression(literal(base)).ln() != 0),
+                           (self.ln() / _SQLColumnExpression(literal(base)).ln()).cast(FLOAT))])
+        else:
+            whens = case([((self != 0) & (base != 0) & (base.ln() != 0),
+                           (self.ln() / base.ln()).cast(FLOAT))])
+        return whens
+    def isnan(self):
+        """
+        DESCRIPTION:
+            Function evaluates a variable or expression to determine if the
+            floating-point argument is a NaN (Not-a-Number) value. When a database
+            table contains a NaN value, the data is undefined and unrepresentable
+            in floating-point arithmetic. For example, division by 0, or the square root
+            of a negative number would return a NaN result.
+        RETURNS:
+            ColumnExpression.
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml","titanic")
+            # Create a DataFrame on 'titanic' table.
+            >>> titanic = DataFrame.from_table('titanic')
+            >>> df = titanic.select(["passenger", "age", "fare"])
+            >>> print(df)
+                        age      fare
+            passenger
+            326        36.0  135.6333
+            183         9.0   31.3875
+            652        18.0   23.0000
+            40         14.0   11.2417
+            774         NaN    7.2250
+            366        30.0    7.2500
+            509        28.0   22.5250
+            795        25.0    7.8958
+            61         22.0    7.2292
+            469         NaN    7.7250
+            >>>
+            # Example 1: Find whether 'fare' column contains NaN values or not.
+            >>> nan_df = df.assign(nanornot = df.fare.isnan())
+            >>> print(nan_df)
+                        age      fare nanornot
+            passenger
+            326        36.0  135.6333        0
+            183         9.0   31.3875        0
+            652        18.0   23.0000        0
+            40         14.0   11.2417        0
+            774         NaN    7.2250        0
+            366        30.0    7.2500        0
+            509        28.0   22.5250        0
+            795        25.0    7.8958        0
+            61         22.0    7.2292        0
+            469         NaN    7.7250        0
+            >>>
+        """
+        return _SQLColumnExpression(literal_column(f"TD_ISNAN({self.compile()})"), type=INTEGER)
+    def isinf(self):
+        """
+        DESCRIPTION:
+            Function evaluates a variable or expression to determine if the
+            floating-point argument is an infinite number. This function determines
+            if a database table contains positive or negative infinite values.
+        RETURNS:
+            ColumnExpression.
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml","titanic")
+            # Create a DataFrame on 'titanic' table.
+            >>> titanic = DataFrame.from_table('titanic')
+            >>> df = titanic.select(["passenger", "age", "fare"])
+            >>> print(df)
+                        age      fare
+            passenger
+            326        36.0  135.6333
+            183         9.0   31.3875
+            652        18.0   23.0000
+            40         14.0   11.2417
+            774         NaN    7.2250
+            366        30.0    7.2500
+            509        28.0   22.5250
+            795        25.0    7.8958
+            61         22.0    7.2292
+            469         NaN    7.7250
+            >>>
+            # Example 1: Find whether 'fare' column contains infinity values or not.
+            >>> inf_df = df.assign(infornot = df.fare.isinf())
+            >>> print(inf_df)
+                        age      fare infornot
+            passenger
+            326        36.0  135.6333        0
+            183         9.0   31.3875        0
+            652        18.0   23.0000        0
+            40         14.0   11.2417        0
+            774         NaN    7.2250        0
+            366        30.0    7.2500        0
+            509        28.0   22.5250        0
+            795        25.0    7.8958        0
+            61         22.0    7.2292        0
+            469         NaN    7.7250        0
+            >>>
+        """
+        return _SQLColumnExpression(literal_column(f"TD_ISINF({self.compile()})"), type=INTEGER)
+    def isfinite(self):
+        """
+        DESCRIPTION:
+            Function evaluates a variable or expression to determine if
+            it is a finite floating value. A finite floating value is not
+            a NaN (Not a Number) value and is not an infinity value.
+        RETURNS:
+            ColumnExpression.
+        EXAMPLES:
+            # Load the data to run the example.
+            >>> load_example_data("teradataml","titanic")
+            # Create a DataFrame on 'titanic' table.
+            >>> titanic = DataFrame.from_table('titanic')
+            >>> df = titanic.select(["passenger", "age", "fare"])
+            >>> print(df)
+                        age      fare
+            passenger
+            326        36.0  135.6333
+            183         9.0   31.3875
+            652        18.0   23.0000
+            40         14.0   11.2417
+            774         NaN    7.2250
+            366        30.0    7.2500
+            509        28.0   22.5250
+            795        25.0    7.8958
+            61         22.0    7.2292
+            469         NaN    7.7250
+            >>>
+            # Example 1: Find whether 'fare' column contains finite values or not.
+            >>> finite_df = df.assign(finiteornot = df.fare.isfinite())
+            >>> print(finite_df)
+                        age    fare finiteornot
+            passenger
+            530        23.0  11.500           1
+            591        35.0   7.125           1
+            387         1.0  46.900           1
+            856        18.0   9.350           1
+            244        22.0   7.125           1
+            713        48.0  52.000           1
+            448        34.0  26.550           1
+            122         NaN   8.050           1
+            734        23.0  13.000           1
+            265         NaN   7.750           1
+            >>>
+        """
+        return _SQLColumnExpression(literal_column(f"TD_ISFINITE({self.compile()})"), type=INTEGER)

teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.1py3-none-any.whl → 20.0.0.3py3-none-any.whl