PyPI - teradataml - Versions diffs - 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl - Mend

teradataml 20.0.0.1py3-none-any.whl → 20.0.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (200) hide show

teradataml/LICENSE.pdf +0 -0
teradataml/README.md +112 -0
teradataml/__init__.py +6 -3
teradataml/_version.py +1 -1
teradataml/analytics/__init__.py +3 -2
teradataml/analytics/analytic_function_executor.py +224 -16
teradataml/analytics/analytic_query_generator.py +92 -0
teradataml/analytics/byom/__init__.py +3 -2
teradataml/analytics/json_parser/metadata.py +1 -0
teradataml/analytics/json_parser/utils.py +6 -4
teradataml/analytics/meta_class.py +40 -1
teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
teradataml/analytics/sqle/__init__.py +10 -2
teradataml/analytics/table_operator/__init__.py +3 -2
teradataml/analytics/uaf/__init__.py +21 -2
teradataml/analytics/utils.py +62 -1
teradataml/analytics/valib.py +1 -1
teradataml/automl/__init__.py +1502 -323
teradataml/automl/custom_json_utils.py +139 -61
teradataml/automl/data_preparation.py +245 -306
teradataml/automl/data_transformation.py +32 -12
teradataml/automl/feature_engineering.py +313 -82
teradataml/automl/model_evaluation.py +44 -35
teradataml/automl/model_training.py +109 -146
teradataml/catalog/byom.py +8 -8
teradataml/clients/pkce_client.py +1 -1
teradataml/common/constants.py +37 -0
teradataml/common/deprecations.py +13 -7
teradataml/common/garbagecollector.py +151 -120
teradataml/common/messagecodes.py +4 -1
teradataml/common/messages.py +2 -1
teradataml/common/sqlbundle.py +1 -1
teradataml/common/utils.py +97 -11
teradataml/common/wrapper_utils.py +1 -1
teradataml/context/context.py +72 -2
teradataml/data/complaints_test_tokenized.csv +353 -0
teradataml/data/complaints_tokens_model.csv +348 -0
teradataml/data/covid_confirm_sd.csv +83 -0
teradataml/data/dataframe_example.json +10 -0
teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
teradataml/data/dwt2d_dataTable.csv +65 -0
teradataml/data/dwt_dataTable.csv +8 -0
teradataml/data/dwt_filterTable.csv +3 -0
teradataml/data/finance_data4.csv +13 -0
teradataml/data/grocery_transaction.csv +19 -0
teradataml/data/idwt2d_dataTable.csv +5 -0
teradataml/data/idwt_dataTable.csv +8 -0
teradataml/data/idwt_filterTable.csv +3 -0
teradataml/data/interval_data.csv +5 -0
teradataml/data/jsons/paired_functions.json +14 -0
teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
teradataml/data/load_example_data.py +8 -2
teradataml/data/naivebayestextclassifier_example.json +1 -1
teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
teradataml/data/peppers.png +0 -0
teradataml/data/real_values.csv +14 -0
teradataml/data/sax_example.json +8 -0
teradataml/data/scripts/deploy_script.py +1 -1
teradataml/data/scripts/sklearn/sklearn_fit.py +17 -10
teradataml/data/scripts/sklearn/sklearn_fit_predict.py +2 -2
teradataml/data/scripts/sklearn/sklearn_function.template +30 -7
teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
teradataml/data/scripts/sklearn/sklearn_transform.py +55 -4
teradataml/data/star_pivot.csv +8 -0
teradataml/data/templates/open_source_ml.json +2 -1
teradataml/data/teradataml_example.json +20 -1
teradataml/data/timestamp_data.csv +4 -0
teradataml/data/titanic_dataset_unpivoted.csv +19 -0
teradataml/data/uaf_example.json +55 -1
teradataml/data/unpivot_example.json +15 -0
teradataml/data/url_data.csv +9 -0
teradataml/data/windowdfft.csv +16 -0
teradataml/dataframe/copy_to.py +1 -1
teradataml/dataframe/data_transfer.py +5 -3
teradataml/dataframe/dataframe.py +474 -41
teradataml/dataframe/fastload.py +3 -3
teradataml/dataframe/functions.py +339 -0
teradataml/dataframe/row.py +160 -0
teradataml/dataframe/setop.py +2 -2
teradataml/dataframe/sql.py +658 -20
teradataml/dataframe/window.py +1 -1
teradataml/dbutils/dbutils.py +322 -16
teradataml/geospatial/geodataframe.py +1 -1
teradataml/geospatial/geodataframecolumn.py +1 -1
teradataml/hyperparameter_tuner/optimizer.py +13 -13
teradataml/lib/aed_0_1.dll +0 -0
teradataml/opensource/sklearn/_sklearn_wrapper.py +154 -69
teradataml/options/__init__.py +3 -1
teradataml/options/configure.py +14 -2
teradataml/options/display.py +2 -2
teradataml/plot/axis.py +4 -4
teradataml/scriptmgmt/UserEnv.py +10 -6
teradataml/scriptmgmt/lls_utils.py +3 -2
teradataml/table_operators/Script.py +2 -2
teradataml/table_operators/TableOperator.py +106 -20
teradataml/table_operators/table_operator_util.py +88 -41
teradataml/table_operators/templates/dataframe_udf.template +63 -0
teradataml/telemetry_utils/__init__.py +0 -0
teradataml/telemetry_utils/queryband.py +52 -0
teradataml/utils/validators.py +1 -1
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +115 -2
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +200 -140
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.1.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0

teradataml/dataframe/functions.py ADDED Viewed

@@ -0,0 +1,339 @@
+from teradatasqlalchemy.types import VARCHAR
+from teradataml.utils.validators import _Validators
+from teradataml.dataframe.sql import _SQLColumnExpression
+from teradatasqlalchemy import (BYTEINT, SMALLINT, INTEGER, BIGINT, DECIMAL, FLOAT,
+                                NUMBER)
+from teradatasqlalchemy import (TIMESTAMP, DATE, TIME)
+from teradatasqlalchemy import (CHAR, VARCHAR, CLOB)
+from teradatasqlalchemy import (BYTE, VARBYTE, BLOB)
+from teradatasqlalchemy import (PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP)
+from teradatasqlalchemy import (INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
+                                INTERVAL_DAY,INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
+                                INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
+                                INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
+                                INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND,
+                                INTERVAL_SECOND)
+def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',', quotechar=None):
+    """
+    DESCRIPTION:
+        Creates a user defined function (UDF).
+    PARAMETERS:
+        user_function:
+            Required Argument.
+            Specifies the user defined function to create a column for
+            teradataml DataFrame.
+            Types: function
+            Note:
+                1. Lambda Function are not supported.
+        returns:
+            Optional Argument.
+            Specifies the output column type.
+            Types: teradata type
+            Default: VARCHAR(1024)
+        env_name:
+            Optional Argument.
+            Specifies the name of the remote user environment or an object of
+            class UserEnv for VantageCloud Lake.
+            Types: str or oject of class UserEnv.
+            Note:
+                * One can set up a user environment with required packages using teradataml
+                  Open Analytics APIs. If no ``env_name`` is provided, udf use the default
+                  ``openml_env`` user environment. This default environment has latest Python
+                  and scikit-learn versions that are supported by Open Analytics Framework
+                  at the time of creating environment.
+        delimiter:
+            Optional Argument.
+            Specifies a delimiter to use when reading columns from a row and
+            writing result columns.
+            Default value: ','
+            Types: str with one character
+            Notes:
+                * This argument cannot be same as "quotechar" argument.
+                * This argument cannot be a newline character.
+                * Use a different delimiter if categorial columns in the data contains
+                  a character same as the delimiter.
+        quotechar:
+            Optional Argument.
+            Specifies a character that forces input of the user function
+            to be quoted using this specified character.
+            Using this argument enables the Advanced SQL Engine to
+            distinguish between NULL fields and empty strings.
+            A string with length zero is quoted, while NULL fields are not.
+            Default value: None
+            Types: str with one character
+            Notes:
+                * This argument cannot be same as "delimiter" argument.
+                * This argument cannot be a newline character.
+    RETURNS:
+        ColumnExpression
+    RAISES:
+        TeradataMLException
+    NOTES:
+        1. While working on date and time data types one must format these to supported formats.
+           (See Requisite Input and Output Structures in Open Analytics Framework for more details.)
+        2. Required packages to run the user defined function must be installed in remote user
+           environment using install_lib function Of UserEnv class. Import statements of these
+           packages should be inside the user defined function itself.
+        3. One can't call a regular function defined outside the udf from the user defined function.
+           The function definition and call must be inside the udf. Look at Example 9 to understand more.
+    EXAMPLES:
+        # Load the data to run the example.
+        >>> load_example_data("dataframe", "sales")
+        # Create a DataFrame on 'sales' table.
+        >>> df = DataFrame("sales")
+        >>> df
+                    Feb    Jan    Mar    Apr    datetime
+        accounts
+        Yellow Inc   90.0    NaN    NaN    NaN  04/01/2017
+        Jones LLC   200.0  150.0  140.0  180.0  04/01/2017
+        Red Inc     200.0  150.0  140.0    NaN  04/01/2017
+        Alpha Co    210.0  200.0  215.0  250.0  04/01/2017
+        Blue Inc     90.0   50.0   95.0  101.0  04/01/2017
+        Orange Inc  210.0    NaN    NaN  250.0  04/01/2017
+        # Example 1: Create the user defined function to get the values in 'accounts'
+        #            to upper case without passing returns argument.
+        >>> from teradataml.dataframe.functions import udf
+        >>> @udf
+        ... def to_upper(s):
+        ...     if s is not None:
+        ...         return s.upper()
+        >>>
+        # Assign the Column Expression returned by user defined function
+        # to the DataFrame.
+        >>> res = df.assign(upper_stats = to_upper('accounts'))
+        >>> res
+                    Feb    Jan    Mar    Apr  datetime upper_stats
+        accounts
+        Alpha Co    210.0  200.0  215.0  250.0  17/01/04    ALPHA CO
+        Blue Inc     90.0   50.0   95.0  101.0  17/01/04    BLUE INC
+        Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  YELLOW INC
+        Jones LLC   200.0  150.0  140.0  180.0  17/01/04   JONES LLC
+        Orange Inc  210.0    NaN    NaN  250.0  17/01/04  ORANGE INC
+        Red Inc     200.0  150.0  140.0    NaN  17/01/04     RED INC
+        >>>
+        # Example 2: Create a user defined function to add length of string values in column
+        #           'accounts' with column 'Feb' and store the result in Integer type column.
+        >>> from teradatasqlalchemy.types import INTEGER
+        >>> @udf(returns=INTEGER())
+        ... def sum(x, y):
+        ...     return len(x)+y
+        >>>
+        # Assign the Column Expression returned by user defined function
+        # to the DataFrame.
+        >>> res = df.assign(len_sum = sum('accounts', 'Feb'))
+        >>> res
+                    Feb    Jan    Mar    Apr  datetime  len_sum
+        accounts
+        Alpha Co    210.0  200.0  215.0  250.0  17/01/04      218
+        Blue Inc     90.0   50.0   95.0  101.0  17/01/04       98
+        Yellow Inc   90.0    NaN    NaN    NaN  17/01/04      100
+        Jones LLC   200.0  150.0  140.0  180.0  17/01/04      209
+        Orange Inc  210.0    NaN    NaN  250.0  17/01/04      220
+        Red Inc     200.0  150.0  140.0    NaN  17/01/04      207
+        >>>
+        # Example 3: Create a function to get the values in 'accounts' to upper case
+        #            and pass it to udf as parameter to create a user defined function.
+        >>> from teradataml.dataframe.functions import udf
+        >>> def to_upper(s):
+        ...     if s is not None:
+        ...         return s.upper()
+        >>> upper_case = udf(to_upper)
+        >>>
+        # Assign the Column Expression returned by user defined function
+        # to the DataFrame.
+        >>> res = df.assign(upper_stats = upper_case('accounts'))
+        >>> res
+                    Feb    Jan    Mar    Apr  datetime upper_stats
+        accounts
+        Alpha Co    210.0  200.0  215.0  250.0  17/01/04    ALPHA CO
+        Blue Inc     90.0   50.0   95.0  101.0  17/01/04    BLUE INC
+        Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  YELLOW INC
+        Jones LLC   200.0  150.0  140.0  180.0  17/01/04   JONES LLC
+        Orange Inc  210.0    NaN    NaN  250.0  17/01/04  ORANGE INC
+        Red Inc     200.0  150.0  140.0    NaN  17/01/04     RED INC
+        >>>
+        # Example 4: Create a user defined function to add 4 to the 'datetime' column
+        #            and store the result in DATE type column.
+        >>> from teradatasqlalchemy.types import DATE
+        >>> import datetime
+        >>> @udf(returns=DATE())
+        ... def add_date(x, y):
+        ...     return (datetime.datetime.strptime(x, "%y/%m/%d")+datetime.timedelta(y)).strftime("%y/%m/%d")
+        >>>
+        # Assign the Column Expression returned by user defined function
+        # to the DataFrame.
+        >>> res = df.assign(new_date = add_date('datetime', 4))
+        >>> res
+                      Feb    Jan    Mar    Apr  datetime  new_date
+        accounts
+        Alpha Co    210.0  200.0  215.0  250.0  17/01/04  17/01/08
+        Blue Inc     90.0   50.0   95.0  101.0  17/01/04  17/01/08
+        Jones LLC   200.0  150.0  140.0  180.0  17/01/04  17/01/08
+        Orange Inc  210.0    NaN    NaN  250.0  17/01/04  17/01/08
+        Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  17/01/08
+        Red Inc     200.0  150.0  140.0    NaN  17/01/04  17/01/08
+        # Example 5: Create a user defined function to add 4 to the 'datetime' column
+        #            without passing returns argument.
+        >>> from teradatasqlalchemy.types import DATE
+        >>> import datetime
+        >>> @udf
+        ... def add_date(x, y):
+        ...     return (datetime.datetime.strptime(x, "%y/%m/%d")+datetime.timedelta(y))
+        >>>
+        # Assign the Column Expression returned by user defined function
+        # to the DataFrame.
+        >>> res = df.assign(new_date = add_date('datetime', 4))
+        >>> res
+                      Feb    Jan    Mar    Apr  datetime             new_date
+        accounts
+        Blue Inc     90.0   50.0   95.0  101.0  17/01/04  2017-01-08 00:00:00
+        Red Inc     200.0  150.0  140.0    NaN  17/01/04  2017-01-08 00:00:00
+        Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  2017-01-08 00:00:00
+        Jones LLC   200.0  150.0  140.0  180.0  17/01/04  2017-01-08 00:00:00
+        Orange Inc  210.0    NaN    NaN  250.0  17/01/04  2017-01-08 00:00:00
+        Alpha Co    210.0  200.0  215.0  250.0  17/01/04  2017-01-08 00:00:00
+        # Example 6: Create a two user defined function to 'to_upper' and 'sum',
+        #            'to_upper' to get the values in 'accounts' to upper case and
+        #            'sum' to add length of string values in column 'accounts'
+        #            with column 'Feb' and store the result in Integer type column.
+        >>> @udf
+        ... def to_upper(s):
+        ...     if s is not None:
+        ...         return s.upper()
+        >>>
+        >>> from teradatasqlalchemy.types import INTEGER
+        >>> @udf(returns=INTEGER())
+        ... def sum(x, y):
+        ...     return len(x)+y
+        >>>
+        # Assign the both Column Expression returned by user defined functions
+        # to the DataFrame.
+        >>> res = df.assign(upper_stats = to_upper('accounts'), len_sum = sum('accounts', 'Feb'))
+        >>> res
+                      Feb    Jan    Mar    Apr  datetime upper_stats  len_sum
+        accounts
+        Blue Inc     90.0   50.0   95.0  101.0  17/01/04    BLUE INC       98
+        Red Inc     200.0  150.0  140.0    NaN  17/01/04     RED INC      207
+        Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  YELLOW INC      100
+        Jones LLC   200.0  150.0  140.0  180.0  17/01/04   JONES LLC      209
+        Orange Inc  210.0    NaN    NaN  250.0  17/01/04  ORANGE INC      220
+        Alpha Co    210.0  200.0  215.0  250.0  17/01/04    ALPHA CO      218
+        >>>
+        # Example 7: Convert the values is 'accounts' column to upper case using a user
+        #            defined function on Vantage Cloud Lake.
+        # Create a Python 3.10.5 environment with given name and description in Vantage.
+        >>> env = create_env('test_udf', 'python_3.10.5', 'Test environment for UDF')
+        User environment 'test_udf' created.
+        >>>
+        # Create a user defined functions to 'to_upper' to get the values in upper case
+        # and pass the user env to run it on.
+        >>> from teradataml.dataframe.functions import udf
+        >>> @udf(env_name = env)
+        ... def to_upper(s):
+        ...     if s is not None:
+        ...         return s.upper()
+        >>>
+        # Assign the Column Expression returned by user defined function
+        # to the DataFrame.
+        >>> df.assign(upper_stats = to_upper('accounts'))
+                    Feb    Jan    Mar    Apr  datetime upper_stats
+        accounts
+        Alpha Co    210.0  200.0  215.0  250.0  17/01/04    ALPHA CO
+        Blue Inc     90.0   50.0   95.0  101.0  17/01/04    BLUE INC
+        Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  YELLOW INC
+        Jones LLC   200.0  150.0  140.0  180.0  17/01/04   JONES LLC
+        Orange Inc  210.0    NaN    NaN  250.0  17/01/04  ORANGE INC
+        Red Inc     200.0  150.0  140.0    NaN  17/01/04     RED INC
+        # Example 8: Create a user defined function to add 4 to the 'datetime' column
+        #            and store the result in DATE type column on Vantage Cloud Lake.
+        >>> from teradatasqlalchemy.types import DATE
+        >>> import datetime
+        >>> @udf(returns=DATE())
+        ... def add_date(x, y):
+        ...     return (datetime.datetime.strptime(x, "%Y-%m-%d")+datetime.timedelta(y)).strftime("%Y-%m-%d")
+        >>>
+        # Assign the Column Expression returned by user defined function
+        # to the DataFrame.
+        >>> res = df.assign(new_date = add_date('datetime', 4))
+        >>> res
+                      Feb    Jan    Mar    Apr  datetime  new_date
+        accounts
+        Alpha Co    210.0  200.0  215.0  250.0  17/01/04  17/01/08
+        Blue Inc     90.0   50.0   95.0  101.0  17/01/04  17/01/08
+        Jones LLC   200.0  150.0  140.0  180.0  17/01/04  17/01/08
+        Orange Inc  210.0    NaN    NaN  250.0  17/01/04  17/01/08
+        Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  17/01/08
+        Red Inc     200.0  150.0  140.0    NaN  17/01/04  17/01/08
+        >>>
+        # Example 9: Define a function 'inner_add_date' inside the udf to create a
+        #            date object by passing year, month, and day and add 1 to that date.
+        #            Call this function inside the user defined function.
+        >>> @udf
+        ... def add_date(y,m,d):
+        ... import datetime
+        ... def inner_add_date(y,m,d):
+        ...     return datetime.date(y,m,d) + datetime.timedelta(1)
+        ... return inner_add_date(y,m,d)
+        # Assign the Column Expression returned by user defined function
+        # to the DataFrame.
+        >>> res = df.assign(new_date = add_date(2021, 10, 5))
+        >>> res
+                    Feb    Jan    Mar    Apr  datetime    new_date
+        accounts
+        Jones LLC   200.0  150.0  140.0  180.0  17/01/04  2021-10-06
+        Blue Inc     90.0   50.0   95.0  101.0  17/01/04  2021-10-06
+        Yellow Inc   90.0    NaN    NaN    NaN  17/01/04  2021-10-06
+        Orange Inc  210.0    NaN    NaN  250.0  17/01/04  2021-10-06
+        Alpha Co    210.0  200.0  215.0  250.0  17/01/04  2021-10-06
+        Red Inc     200.0  150.0  140.0    NaN  17/01/04  2021-10-06
+        >>>
+    """
+    allowed_datatypes = (BYTEINT, SMALLINT, INTEGER, BIGINT, DECIMAL, FLOAT, NUMBER,
+                        TIMESTAMP, DATE, TIME, CHAR, VARCHAR, CLOB, BYTE, VARBYTE,
+                        BLOB, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP,
+                        INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_MONTH,
+                        INTERVAL_DAY, INTERVAL_DAY_TO_HOUR, INTERVAL_DAY_TO_MINUTE,
+                        INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR,
+                        INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
+                        INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND, INTERVAL_SECOND
+                        )
+    # Validate datatypes in returns.
+    _Validators._validate_function_arguments([["returns", returns, False, allowed_datatypes]])
+    # Notation: @udf(returnType=INTEGER())
+    if user_function is None:
+        def wrapper(f):
+            def func_(*args):
+                return _SQLColumnExpression(expression=None, udf=f, udf_type=returns, udf_args=args,\
+                                            env_name=env_name, delimiter=delimiter, quotechar=quotechar)
+            return func_
+        return wrapper
+    # Notation: @udf
+    else:
+        def func_(*args):
+            return _SQLColumnExpression(expression=None, udf=user_function, udf_type=returns, udf_args=args,\
+                                        env_name=env_name, delimiter=delimiter, quotechar=quotechar)
+    return func_

teradataml/dataframe/row.py ADDED Viewed

@@ -0,0 +1,160 @@
+"""
+Unpublished work.
+Copyright (c) 2021 by Teradata Corporation. All rights reserved.
+TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
+Primary Owner: pradeep.garre@teradata.com
+Secondary Owner: PankajVinod.Purandare@teradata.com
+This file implements the Row for teradataml DataFrame.
+"""
+class _Row:
+    """ Class for representing a row in teradataml DataFrame. """
+    def __init__(self, columns, values):
+        """
+        DESCRIPTION:
+            Constructor for creating Row object.
+        PARAMETERS:
+            columns:
+                Required Argument.
+                Specifies the name(s) of the columns for the corresponding
+                teradataml DataFrame.
+                Types: list
+            values:
+                Required Argument.
+                Specifies the corresponding values for the columns.
+                Types: list
+        RAISES:
+            None
+        EXAMPLES:
+            # Example 1: Create a Row for columns 'a', 'b', 'c' and corresponding values 'p', 'q', 'r'.
+            >>> from teradataml.utils.utils import Row
+            >>> row = Row(columns=['a', 'b', 'c'], values=['p', 'q', 'r'])
+        """
+        self.__data = dict(zip(columns, values))
+        self.__values = values
+        # Create a function _asdict similar to namedtuple._asdict
+        self._asdict = lambda: self.__data
+    def __getattr__(self, item):
+        """
+        DESCRIPTION:
+            Retrieve the corresponding value for column
+            using dot(.) notation.
+        PARAMETERS:
+            item:
+                Required Argument.
+                Specifies name of the column.
+                Types: str
+        RETURNS:
+            str OR int OR float OR datetime
+        EXAMPLES:
+            >>> row = Row(columns=['a', 'b', 'c'], values=['p', 'q', 'r'])
+            >>> row.a
+        """
+        # Check if item is a valid column or not. If yes, proceed. Otherwise raise error.
+        if item in self.__data:
+            return self.__data[item]
+        raise AttributeError("'Row' object has no attribute '{}'".format(item))
+    def __getitem__(self, item):
+        """
+        DESCRIPTION:
+            Retrieve the corresponding value for column
+            using square bracket([]) notation.
+        PARAMETERS:
+            item:
+                Required Argument.
+                Specifies the name or the index of the column.
+                Types: str
+        RETURNS:
+            str OR int OR float OR datetime
+        EXAMPLES:
+            >>> row = Row(columns=['a', 'b', 'c'], values=['p', 'q', 'r'])
+            >>> row['a']
+            'p'
+            >>> row[1]
+            'q'
+        """
+        # User's can retrieve the value of a column either by using name of the
+        # column or by index of column position.
+        if isinstance(item, int):
+            # Check if sourced index is valid or not.
+            if item >= len(self.__values):
+                raise IndexError("tuple index out of range")
+            return self.__values[item]
+        # If it is a string, retrieve it from here. Otherwise, raise error.
+        if item in self.__data:
+            return self.__data[item]
+        raise AttributeError("'Row' object has no attribute '{}'".format(item))
+    def __dir__(self):
+        """
+        DESCRIPTION:
+            Provide the suggestions for column names.
+        PARAMETERS:
+            None
+        RETURNS:
+            tuple
+        EXAMPLES:
+            >>> row = Row(columns=['a', 'b', 'c'], values=['p', 'q', 'r'])
+            >>> dir(row)
+        """
+        return tuple(col for col in self.__data)
+    def __str__(self):
+        """
+        DESCRIPTION:
+            Returns the string representation of _Row object.
+        PARAMETERS:
+            None
+        RETURNS:
+            tuple
+        EXAMPLES:
+            >>> row = Row(columns=['a', 'b', 'c'], values=['p', 'q', 'r'])
+            >>> print(row)
+        """
+        return self.__repr__()
+    def __repr__(self):
+        """
+        DESCRIPTION:
+            Returns the string representation of _Row object.
+        PARAMETERS:
+            None
+        RETURNS:
+            tuple
+        EXAMPLES:
+            >>> row = Row(columns=['a', 'b', 'c'], values=['p', 'q', 'r'])
+            >>> print(row)
+        """
+        columns_values = ", ".join(("{}={}".format(col, repr(val)) for col, val in self.__data.items()))
+        return "Row({})".format(columns_values)

teradataml/dataframe/setop.py CHANGED Viewed

@@ -24,7 +24,7 @@ from teradatasqlalchemy.dialect import dialect as td_dialect, TeradataTypeCompil
 from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
 from teradatasql import OperationalError
-from teradatasqlalchemy.telemetry.queryband import collect_queryband
+from teradataml.telemetry_utils.queryband import collect_queryband
 module = importlib.import_module("teradataml")
@@ -963,7 +963,7 @@ def td_minus(df_list, allow_duplicates=True):
     awu_matrix = []
     awu_matrix.append(["df_list", df_list, False, (list)])
     awu_matrix.append(["allow_duplicates", allow_duplicates, False, (bool)])
-    setop_type = 'td_except' if (inspect.stack()[2][3] and inspect.stack()[2][3] == 'td_except') else 'td_minus'
+    setop_type = 'td_except' if (inspect.stack()[3][3] and inspect.stack()[3][3] == 'td_except') else 'td_minus'
     operation = 'minus'
     # Validate Set operator arguments

teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.1py3-none-any.whl → 20.0.0.2py3-none-any.whl