teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/LICENSE.pdf +0 -0
- teradataml/README.md +306 -0
- teradataml/__init__.py +10 -3
- teradataml/_version.py +1 -1
- teradataml/analytics/__init__.py +3 -2
- teradataml/analytics/analytic_function_executor.py +299 -16
- teradataml/analytics/analytic_query_generator.py +92 -0
- teradataml/analytics/byom/__init__.py +3 -2
- teradataml/analytics/json_parser/metadata.py +13 -3
- teradataml/analytics/json_parser/utils.py +13 -6
- teradataml/analytics/meta_class.py +40 -1
- teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
- teradataml/analytics/sqle/__init__.py +11 -2
- teradataml/analytics/table_operator/__init__.py +4 -3
- teradataml/analytics/uaf/__init__.py +21 -2
- teradataml/analytics/utils.py +66 -1
- teradataml/analytics/valib.py +1 -1
- teradataml/automl/__init__.py +1502 -323
- teradataml/automl/custom_json_utils.py +139 -61
- teradataml/automl/data_preparation.py +247 -307
- teradataml/automl/data_transformation.py +32 -12
- teradataml/automl/feature_engineering.py +325 -86
- teradataml/automl/model_evaluation.py +44 -35
- teradataml/automl/model_training.py +122 -153
- teradataml/catalog/byom.py +8 -8
- teradataml/clients/pkce_client.py +1 -1
- teradataml/common/__init__.py +2 -1
- teradataml/common/constants.py +72 -0
- teradataml/common/deprecations.py +13 -7
- teradataml/common/garbagecollector.py +152 -120
- teradataml/common/messagecodes.py +11 -2
- teradataml/common/messages.py +4 -1
- teradataml/common/sqlbundle.py +26 -4
- teradataml/common/utils.py +225 -14
- teradataml/common/wrapper_utils.py +1 -1
- teradataml/context/context.py +82 -2
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/data/complaints_test_tokenized.csv +353 -0
- teradataml/data/complaints_tokens_model.csv +348 -0
- teradataml/data/covid_confirm_sd.csv +83 -0
- teradataml/data/dataframe_example.json +27 -1
- teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
- teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
- teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
- teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
- teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
- teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
- teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
- teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
- teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
- teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
- teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
- teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
- teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
- teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
- teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
- teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
- teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
- teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
- teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
- teradataml/data/dwt2d_dataTable.csv +65 -0
- teradataml/data/dwt_dataTable.csv +8 -0
- teradataml/data/dwt_filterTable.csv +3 -0
- teradataml/data/finance_data4.csv +13 -0
- teradataml/data/grocery_transaction.csv +19 -0
- teradataml/data/idwt2d_dataTable.csv +5 -0
- teradataml/data/idwt_dataTable.csv +8 -0
- teradataml/data/idwt_filterTable.csv +3 -0
- teradataml/data/interval_data.csv +5 -0
- teradataml/data/jsons/paired_functions.json +14 -0
- teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
- teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
- teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
- teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
- teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
- teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
- teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
- teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
- teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
- teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
- teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
- teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
- teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
- teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
- teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
- teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
- teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
- teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
- teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
- teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
- teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
- teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
- teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
- teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
- teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
- teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
- teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
- teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
- teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
- teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
- teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
- teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
- teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
- teradataml/data/load_example_data.py +8 -2
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/naivebayestextclassifier_example.json +1 -1
- teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/peppers.png +0 -0
- teradataml/data/real_values.csv +14 -0
- teradataml/data/sax_example.json +8 -0
- teradataml/data/scripts/deploy_script.py +1 -1
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
- teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
- teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
- teradataml/data/star_pivot.csv +8 -0
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -1
- teradataml/data/teradataml_example.json +20 -1
- teradataml/data/timestamp_data.csv +4 -0
- teradataml/data/titanic_dataset_unpivoted.csv +19 -0
- teradataml/data/uaf_example.json +55 -1
- teradataml/data/unpivot_example.json +15 -0
- teradataml/data/url_data.csv +9 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/data/windowdfft.csv +16 -0
- teradataml/dataframe/copy_to.py +1 -1
- teradataml/dataframe/data_transfer.py +5 -3
- teradataml/dataframe/dataframe.py +1002 -201
- teradataml/dataframe/fastload.py +3 -3
- teradataml/dataframe/functions.py +867 -0
- teradataml/dataframe/row.py +160 -0
- teradataml/dataframe/setop.py +2 -2
- teradataml/dataframe/sql.py +840 -33
- teradataml/dataframe/window.py +1 -1
- teradataml/dbutils/dbutils.py +878 -34
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/geospatial/geodataframe.py +1 -1
- teradataml/geospatial/geodataframecolumn.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +13 -13
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
- teradataml/opensource/_lightgbm.py +950 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
- teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
- teradataml/options/__init__.py +9 -23
- teradataml/options/configure.py +42 -4
- teradataml/options/display.py +2 -2
- teradataml/plot/axis.py +4 -4
- teradataml/scriptmgmt/UserEnv.py +13 -9
- teradataml/scriptmgmt/lls_utils.py +77 -23
- teradataml/store/__init__.py +13 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2223 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/store/vector_store/__init__.py +1586 -0
- teradataml/table_operators/Script.py +2 -2
- teradataml/table_operators/TableOperator.py +106 -20
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +102 -56
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/table_operators/templates/dataframe_udf.template +63 -0
- teradataml/telemetry_utils/__init__.py +0 -0
- teradataml/telemetry_utils/queryband.py +52 -0
- teradataml/utils/dtypes.py +4 -2
- teradataml/utils/validators.py +34 -2
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,867 @@
|
|
|
1
|
+
import pandas as pd
|
|
2
|
+
from inspect import getsource
|
|
3
|
+
import re
|
|
4
|
+
from types import FunctionType
|
|
5
|
+
from teradataml.dbutils.filemgr import install_file, list_files, remove_file
|
|
6
|
+
from teradataml.options.configure import configure
|
|
7
|
+
import teradatasqlalchemy as tdsqlalchemy
|
|
8
|
+
from teradataml.utils.validators import _Validators
|
|
9
|
+
from teradataml.dataframe.sql import _SQLColumnExpression
|
|
10
|
+
from teradatasqlalchemy import VARCHAR, CLOB, CHAR
|
|
11
|
+
from teradataml.common.constants import TeradataTypes
|
|
12
|
+
from teradataml.common.utils import UtilFuncs
|
|
13
|
+
from teradataml.utils.dtypes import _Dtypes
|
|
14
|
+
from teradataml.dataframe.sql_interfaces import ColumnExpression
|
|
15
|
+
from teradataml.table_operators.table_operator_util import _TableOperatorUtils
|
|
16
|
+
from teradataml.utils.internal_buffer import _InternalBuffer
|
|
17
|
+
from teradataml.common.exceptions import TeradataMlException
|
|
18
|
+
from teradataml.common.messages import Messages
|
|
19
|
+
from teradataml.common.messagecodes import MessageCodes
|
|
20
|
+
from teradataml.scriptmgmt.lls_utils import get_env
|
|
21
|
+
|
|
22
|
+
def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',', quotechar=None):
|
|
23
|
+
"""
|
|
24
|
+
DESCRIPTION:
|
|
25
|
+
Creates a user defined function (UDF).
|
|
26
|
+
|
|
27
|
+
PARAMETERS:
|
|
28
|
+
user_function:
|
|
29
|
+
Required Argument.
|
|
30
|
+
Specifies the user defined function to create a column for
|
|
31
|
+
teradataml DataFrame.
|
|
32
|
+
Types: function
|
|
33
|
+
Note:
|
|
34
|
+
1. Lambda Function are not supported.
|
|
35
|
+
|
|
36
|
+
returns:
|
|
37
|
+
Optional Argument.
|
|
38
|
+
Specifies the output column type.
|
|
39
|
+
Types: teradatasqlalchemy types object
|
|
40
|
+
Default: VARCHAR(1024)
|
|
41
|
+
|
|
42
|
+
env_name:
|
|
43
|
+
Optional Argument.
|
|
44
|
+
Specifies the name of the remote user environment or an object of
|
|
45
|
+
class UserEnv for VantageCloud Lake.
|
|
46
|
+
Types: str or oject of class UserEnv.
|
|
47
|
+
Note:
|
|
48
|
+
* One can set up a user environment with required packages using teradataml
|
|
49
|
+
Open Analytics APIs. If no ``env_name`` is provided, udf use the default
|
|
50
|
+
``openml_env`` user environment. This default environment has latest Python
|
|
51
|
+
and scikit-learn versions that are supported by Open Analytics Framework
|
|
52
|
+
at the time of creating environment.
|
|
53
|
+
|
|
54
|
+
delimiter:
|
|
55
|
+
Optional Argument.
|
|
56
|
+
Specifies a delimiter to use when reading columns from a row and
|
|
57
|
+
writing result columns.
|
|
58
|
+
Default value: ','
|
|
59
|
+
Types: str with one character
|
|
60
|
+
Notes:
|
|
61
|
+
* This argument cannot be same as "quotechar" argument.
|
|
62
|
+
* This argument cannot be a newline character.
|
|
63
|
+
* Use a different delimiter if categorial columns in the data contains
|
|
64
|
+
a character same as the delimiter.
|
|
65
|
+
|
|
66
|
+
quotechar:
|
|
67
|
+
Optional Argument.
|
|
68
|
+
Specifies a character that forces input of the user function
|
|
69
|
+
to be quoted using this specified character.
|
|
70
|
+
Using this argument enables the Advanced SQL Engine to
|
|
71
|
+
distinguish between NULL fields and empty strings.
|
|
72
|
+
A string with length zero is quoted, while NULL fields are not.
|
|
73
|
+
Default value: None
|
|
74
|
+
Types: str with one character
|
|
75
|
+
Notes:
|
|
76
|
+
* This argument cannot be same as "delimiter" argument.
|
|
77
|
+
* This argument cannot be a newline character.
|
|
78
|
+
|
|
79
|
+
RETURNS:
|
|
80
|
+
ColumnExpression
|
|
81
|
+
|
|
82
|
+
RAISES:
|
|
83
|
+
TeradataMLException
|
|
84
|
+
|
|
85
|
+
NOTES:
|
|
86
|
+
1. While working on date and time data types one must format these to supported formats.
|
|
87
|
+
(See Requisite Input and Output Structures in Open Analytics Framework for more details.)
|
|
88
|
+
2. Required packages to run the user defined function must be installed in remote user
|
|
89
|
+
environment using install_lib function Of UserEnv class. Import statements of these
|
|
90
|
+
packages should be inside the user defined function itself.
|
|
91
|
+
3. One can't call a regular function defined outside the udf from the user defined function.
|
|
92
|
+
The function definition and call must be inside the udf. Look at Example 9 to understand more.
|
|
93
|
+
|
|
94
|
+
EXAMPLES:
|
|
95
|
+
# Load the data to run the example.
|
|
96
|
+
>>> load_example_data("dataframe", "sales")
|
|
97
|
+
|
|
98
|
+
# Create a DataFrame on 'sales' table.
|
|
99
|
+
>>> df = DataFrame("sales")
|
|
100
|
+
>>> df
|
|
101
|
+
Feb Jan Mar Apr datetime
|
|
102
|
+
accounts
|
|
103
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
104
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
105
|
+
Red Inc 200.0 150.0 140.0 NaN 04/01/2017
|
|
106
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
107
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
108
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
109
|
+
|
|
110
|
+
# Example 1: Create the user defined function to get the values in 'accounts'
|
|
111
|
+
# to upper case without passing returns argument.
|
|
112
|
+
>>> from teradataml.dataframe.functions import udf
|
|
113
|
+
>>> @udf
|
|
114
|
+
... def to_upper(s):
|
|
115
|
+
... if s is not None:
|
|
116
|
+
... return s.upper()
|
|
117
|
+
>>>
|
|
118
|
+
# Assign the Column Expression returned by user defined function
|
|
119
|
+
# to the DataFrame.
|
|
120
|
+
>>> res = df.assign(upper_stats = to_upper('accounts'))
|
|
121
|
+
>>> res
|
|
122
|
+
Feb Jan Mar Apr datetime upper_stats
|
|
123
|
+
accounts
|
|
124
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO
|
|
125
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC
|
|
126
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC
|
|
127
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC
|
|
128
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC
|
|
129
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC
|
|
130
|
+
>>>
|
|
131
|
+
|
|
132
|
+
# Example 2: Create a user defined function to add length of string values in column
|
|
133
|
+
# 'accounts' with column 'Feb' and store the result in Integer type column.
|
|
134
|
+
>>> from teradatasqlalchemy.types import INTEGER
|
|
135
|
+
>>> @udf(returns=INTEGER())
|
|
136
|
+
... def sum(x, y):
|
|
137
|
+
... return len(x)+y
|
|
138
|
+
>>>
|
|
139
|
+
# Assign the Column Expression returned by user defined function
|
|
140
|
+
# to the DataFrame.
|
|
141
|
+
>>> res = df.assign(len_sum = sum('accounts', 'Feb'))
|
|
142
|
+
>>> res
|
|
143
|
+
Feb Jan Mar Apr datetime len_sum
|
|
144
|
+
accounts
|
|
145
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 218
|
|
146
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 98
|
|
147
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 100
|
|
148
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 209
|
|
149
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 220
|
|
150
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 207
|
|
151
|
+
>>>
|
|
152
|
+
|
|
153
|
+
# Example 3: Create a function to get the values in 'accounts' to upper case
|
|
154
|
+
# and pass it to udf as parameter to create a user defined function.
|
|
155
|
+
>>> from teradataml.dataframe.functions import udf
|
|
156
|
+
>>> def to_upper(s):
|
|
157
|
+
... if s is not None:
|
|
158
|
+
... return s.upper()
|
|
159
|
+
>>> upper_case = udf(to_upper)
|
|
160
|
+
>>>
|
|
161
|
+
# Assign the Column Expression returned by user defined function
|
|
162
|
+
# to the DataFrame.
|
|
163
|
+
>>> res = df.assign(upper_stats = upper_case('accounts'))
|
|
164
|
+
>>> res
|
|
165
|
+
Feb Jan Mar Apr datetime upper_stats
|
|
166
|
+
accounts
|
|
167
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO
|
|
168
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC
|
|
169
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC
|
|
170
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC
|
|
171
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC
|
|
172
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC
|
|
173
|
+
>>>
|
|
174
|
+
|
|
175
|
+
# Example 4: Create a user defined function to add 4 to the 'datetime' column
|
|
176
|
+
# and store the result in DATE type column.
|
|
177
|
+
>>> from teradatasqlalchemy.types import DATE
|
|
178
|
+
>>> import datetime
|
|
179
|
+
>>> @udf(returns=DATE())
|
|
180
|
+
... def add_date(x, y):
|
|
181
|
+
... return (datetime.datetime.strptime(x, "%y/%m/%d")+datetime.timedelta(y)).strftime("%y/%m/%d")
|
|
182
|
+
>>>
|
|
183
|
+
# Assign the Column Expression returned by user defined function
|
|
184
|
+
# to the DataFrame.
|
|
185
|
+
>>> res = df.assign(new_date = add_date('datetime', 4))
|
|
186
|
+
>>> res
|
|
187
|
+
Feb Jan Mar Apr datetime new_date
|
|
188
|
+
accounts
|
|
189
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 17/01/08
|
|
190
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 17/01/08
|
|
191
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 17/01/08
|
|
192
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 17/01/08
|
|
193
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 17/01/08
|
|
194
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 17/01/08
|
|
195
|
+
|
|
196
|
+
# Example 5: Create a user defined function to add 4 to the 'datetime' column
|
|
197
|
+
# without passing returns argument.
|
|
198
|
+
>>> from teradatasqlalchemy.types import DATE
|
|
199
|
+
>>> import datetime
|
|
200
|
+
>>> @udf
|
|
201
|
+
... def add_date(x, y):
|
|
202
|
+
... return (datetime.datetime.strptime(x, "%y/%m/%d")+datetime.timedelta(y))
|
|
203
|
+
>>>
|
|
204
|
+
# Assign the Column Expression returned by user defined function
|
|
205
|
+
# to the DataFrame.
|
|
206
|
+
>>> res = df.assign(new_date = add_date('datetime', 4))
|
|
207
|
+
>>> res
|
|
208
|
+
Feb Jan Mar Apr datetime new_date
|
|
209
|
+
accounts
|
|
210
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 2017-01-08 00:00:00
|
|
211
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 2017-01-08 00:00:00
|
|
212
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 2017-01-08 00:00:00
|
|
213
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 2017-01-08 00:00:00
|
|
214
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 2017-01-08 00:00:00
|
|
215
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2017-01-08 00:00:00
|
|
216
|
+
|
|
217
|
+
# Example 6: Create a two user defined function to 'to_upper' and 'sum',
|
|
218
|
+
# 'to_upper' to get the values in 'accounts' to upper case and
|
|
219
|
+
# 'sum' to add length of string values in column 'accounts'
|
|
220
|
+
# with column 'Feb' and store the result in Integer type column.
|
|
221
|
+
>>> @udf
|
|
222
|
+
... def to_upper(s):
|
|
223
|
+
... if s is not None:
|
|
224
|
+
... return s.upper()
|
|
225
|
+
>>>
|
|
226
|
+
>>> from teradatasqlalchemy.types import INTEGER
|
|
227
|
+
>>> @udf(returns=INTEGER())
|
|
228
|
+
... def sum(x, y):
|
|
229
|
+
... return len(x)+y
|
|
230
|
+
>>>
|
|
231
|
+
# Assign the both Column Expression returned by user defined functions
|
|
232
|
+
# to the DataFrame.
|
|
233
|
+
>>> res = df.assign(upper_stats = to_upper('accounts'), len_sum = sum('accounts', 'Feb'))
|
|
234
|
+
>>> res
|
|
235
|
+
Feb Jan Mar Apr datetime upper_stats len_sum
|
|
236
|
+
accounts
|
|
237
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC 98
|
|
238
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC 207
|
|
239
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC 100
|
|
240
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC 209
|
|
241
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC 220
|
|
242
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO 218
|
|
243
|
+
>>>
|
|
244
|
+
|
|
245
|
+
# Example 7: Convert the values is 'accounts' column to upper case using a user
|
|
246
|
+
# defined function on Vantage Cloud Lake.
|
|
247
|
+
# Create a Python 3.10.5 environment with given name and description in Vantage.
|
|
248
|
+
>>> env = create_env('test_udf', 'python_3.10.5', 'Test environment for UDF')
|
|
249
|
+
User environment 'test_udf' created.
|
|
250
|
+
>>>
|
|
251
|
+
# Create a user defined functions to 'to_upper' to get the values in upper case
|
|
252
|
+
# and pass the user env to run it on.
|
|
253
|
+
>>> from teradataml.dataframe.functions import udf
|
|
254
|
+
>>> @udf(env_name = env)
|
|
255
|
+
... def to_upper(s):
|
|
256
|
+
... if s is not None:
|
|
257
|
+
... return s.upper()
|
|
258
|
+
>>>
|
|
259
|
+
# Assign the Column Expression returned by user defined function
|
|
260
|
+
# to the DataFrame.
|
|
261
|
+
>>> df.assign(upper_stats = to_upper('accounts'))
|
|
262
|
+
Feb Jan Mar Apr datetime upper_stats
|
|
263
|
+
accounts
|
|
264
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO
|
|
265
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC
|
|
266
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC
|
|
267
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC
|
|
268
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC
|
|
269
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC
|
|
270
|
+
|
|
271
|
+
# Example 8: Create a user defined function to add 4 to the 'datetime' column
|
|
272
|
+
# and store the result in DATE type column on Vantage Cloud Lake.
|
|
273
|
+
>>> from teradatasqlalchemy.types import DATE
|
|
274
|
+
>>> import datetime
|
|
275
|
+
>>> @udf(returns=DATE())
|
|
276
|
+
... def add_date(x, y):
|
|
277
|
+
... return (datetime.datetime.strptime(x, "%Y-%m-%d")+datetime.timedelta(y)).strftime("%Y-%m-%d")
|
|
278
|
+
>>>
|
|
279
|
+
# Assign the Column Expression returned by user defined function
|
|
280
|
+
# to the DataFrame.
|
|
281
|
+
>>> res = df.assign(new_date = add_date('datetime', 4))
|
|
282
|
+
>>> res
|
|
283
|
+
Feb Jan Mar Apr datetime new_date
|
|
284
|
+
accounts
|
|
285
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 17/01/08
|
|
286
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 17/01/08
|
|
287
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 17/01/08
|
|
288
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 17/01/08
|
|
289
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 17/01/08
|
|
290
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 17/01/08
|
|
291
|
+
>>>
|
|
292
|
+
|
|
293
|
+
# Example 9: Define a function 'inner_add_date' inside the udf to create a
|
|
294
|
+
# date object by passing year, month, and day and add 1 to that date.
|
|
295
|
+
# Call this function inside the user defined function.
|
|
296
|
+
>>> @udf
|
|
297
|
+
... def add_date(y,m,d):
|
|
298
|
+
... import datetime
|
|
299
|
+
... def inner_add_date(y,m,d):
|
|
300
|
+
... return datetime.date(y,m,d) + datetime.timedelta(1)
|
|
301
|
+
... return inner_add_date(y,m,d)
|
|
302
|
+
|
|
303
|
+
# Assign the Column Expression returned by user defined function
|
|
304
|
+
# to the DataFrame.
|
|
305
|
+
>>> res = df.assign(new_date = add_date(2021, 10, 5))
|
|
306
|
+
>>> res
|
|
307
|
+
Feb Jan Mar Apr datetime new_date
|
|
308
|
+
accounts
|
|
309
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 2021-10-06
|
|
310
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 2021-10-06
|
|
311
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 2021-10-06
|
|
312
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 2021-10-06
|
|
313
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2021-10-06
|
|
314
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 2021-10-06
|
|
315
|
+
>>>
|
|
316
|
+
"""
|
|
317
|
+
|
|
318
|
+
allowed_datatypes = TeradataTypes.TD_ALL_TYPES.value
|
|
319
|
+
# Validate datatypes in returns.
|
|
320
|
+
_Validators._validate_function_arguments([["returns", returns, False, allowed_datatypes]])
|
|
321
|
+
|
|
322
|
+
# Notation: @udf(returnType=INTEGER())
|
|
323
|
+
if user_function is None:
|
|
324
|
+
def wrapper(f):
|
|
325
|
+
def func_(*args):
|
|
326
|
+
return _SQLColumnExpression(expression=None, udf=f, udf_type=returns, udf_args=args,\
|
|
327
|
+
env_name=env_name, delimiter=delimiter, quotechar=quotechar)
|
|
328
|
+
return func_
|
|
329
|
+
return wrapper
|
|
330
|
+
# Notation: @udf
|
|
331
|
+
else:
|
|
332
|
+
def func_(*args):
|
|
333
|
+
return _SQLColumnExpression(expression=None, udf=user_function, udf_type=returns, udf_args=args,\
|
|
334
|
+
env_name=env_name, delimiter=delimiter, quotechar=quotechar)
|
|
335
|
+
return func_
|
|
336
|
+
|
|
337
|
+
|
|
338
|
+
def register(name, user_function, returns=VARCHAR(1024)):
|
|
339
|
+
"""
|
|
340
|
+
DESCRIPTION:
|
|
341
|
+
Registers a user defined function (UDF).
|
|
342
|
+
|
|
343
|
+
PARAMETERS:
|
|
344
|
+
name:
|
|
345
|
+
Required Argument.
|
|
346
|
+
Specifies the name of the user defined function to register.
|
|
347
|
+
Types: str
|
|
348
|
+
|
|
349
|
+
user_function:
|
|
350
|
+
Required Argument.
|
|
351
|
+
Specifies the user defined function to create a column for
|
|
352
|
+
teradataml DataFrame.
|
|
353
|
+
Types: function, udf
|
|
354
|
+
|
|
355
|
+
returns:
|
|
356
|
+
Optional Argument.
|
|
357
|
+
Specifies the output column type used to register the user defined function.
|
|
358
|
+
Note:
|
|
359
|
+
* If 'user_function' is a udf, then return type of the udf is used as return type
|
|
360
|
+
of the registered user defined function.
|
|
361
|
+
Default Value: VARCHAR(1024)
|
|
362
|
+
Types: teradatasqlalchemy types object
|
|
363
|
+
|
|
364
|
+
RETURNS:
|
|
365
|
+
None
|
|
366
|
+
|
|
367
|
+
RAISES:
|
|
368
|
+
TeradataMLException, TypeError
|
|
369
|
+
|
|
370
|
+
EXAMPLES:
|
|
371
|
+
# Example 1: Register the user defined function to get the values upper case.
|
|
372
|
+
>>> from teradataml.dataframe.functions import udf, register
|
|
373
|
+
>>> @udf
|
|
374
|
+
... def to_upper(s):
|
|
375
|
+
... if s is not None:
|
|
376
|
+
... return s.upper()
|
|
377
|
+
>>>
|
|
378
|
+
# Register the created user defined function.
|
|
379
|
+
>>> register("upper_val", to_upper)
|
|
380
|
+
>>>
|
|
381
|
+
|
|
382
|
+
# Example 2: Register a user defined function to get factorial of a number and
|
|
383
|
+
# store the result in Integer type column.
|
|
384
|
+
>>> from teradataml.dataframe.functions import udf, register
|
|
385
|
+
>>> from teradatasqlalchemy.types import INTEGER
|
|
386
|
+
>>> @udf
|
|
387
|
+
... def factorial(n):
|
|
388
|
+
... import math
|
|
389
|
+
... return math.factorial(n)
|
|
390
|
+
>>>
|
|
391
|
+
# Register the created user defined function.
|
|
392
|
+
>>> register("fact", factorial, INTEGER())
|
|
393
|
+
>>>
|
|
394
|
+
|
|
395
|
+
# Example 3: Register a Python function to get the values upper case.
|
|
396
|
+
>>> from teradataml.dataframe.functions import register
|
|
397
|
+
>>> def to_upper(s):
|
|
398
|
+
... return s.upper()
|
|
399
|
+
>>>
|
|
400
|
+
# Register the created Python function.
|
|
401
|
+
>>> register("upper_val", to_upper)
|
|
402
|
+
>>>
|
|
403
|
+
"""
|
|
404
|
+
|
|
405
|
+
# Validate the arguments.
|
|
406
|
+
arg_matrix = []
|
|
407
|
+
allowed_datatypes = TeradataTypes.TD_ALL_TYPES.value
|
|
408
|
+
arg_matrix.append(["returns", returns, True, allowed_datatypes])
|
|
409
|
+
arg_matrix.append(["name", name, False, str])
|
|
410
|
+
_Validators._validate_function_arguments(arg_matrix)
|
|
411
|
+
|
|
412
|
+
function = []
|
|
413
|
+
# Check if the user_function is Python function or
|
|
414
|
+
# a user defined function(udf) or ColumnExpression returned by udf.
|
|
415
|
+
if isinstance(user_function, ColumnExpression):
|
|
416
|
+
function.append(user_function._udf)
|
|
417
|
+
returns = user_function._type
|
|
418
|
+
elif "udf.<locals>" not in user_function.__qualname__:
|
|
419
|
+
function.append(user_function)
|
|
420
|
+
else:
|
|
421
|
+
user_function = user_function.__call__()
|
|
422
|
+
function.append(user_function._udf)
|
|
423
|
+
returns = user_function._type
|
|
424
|
+
|
|
425
|
+
# Create a dictionary of user defined function name to return type.
|
|
426
|
+
returns = {name: _create_return_type(returns)}
|
|
427
|
+
|
|
428
|
+
exec_mode = 'REMOTE' if UtilFuncs._is_lake() else 'IN-DB'
|
|
429
|
+
|
|
430
|
+
tbl_operators = _TableOperatorUtils([],
|
|
431
|
+
None,
|
|
432
|
+
"register",
|
|
433
|
+
function,
|
|
434
|
+
exec_mode,
|
|
435
|
+
chunk_size=None,
|
|
436
|
+
num_rows=1,
|
|
437
|
+
delimiter=None,
|
|
438
|
+
quotechar=None,
|
|
439
|
+
data_partition_column=None,
|
|
440
|
+
data_hash_column=None,
|
|
441
|
+
style = "csv",
|
|
442
|
+
returns = returns,
|
|
443
|
+
)
|
|
444
|
+
|
|
445
|
+
# Install the file on the lake/enterprise environment.
|
|
446
|
+
if exec_mode == 'REMOTE':
|
|
447
|
+
_Validators._check_auth_token("register")
|
|
448
|
+
env_name = UtilFuncs._get_env_name()
|
|
449
|
+
tbl_operators.__env = get_env(env_name)
|
|
450
|
+
tbl_operators.__env.install_file(tbl_operators.script_path, suppress_output=True, replace=True)
|
|
451
|
+
else:
|
|
452
|
+
install_file(file_identifier=tbl_operators.script_base_name,
|
|
453
|
+
file_path=tbl_operators.script_path,
|
|
454
|
+
suppress_output=True, replace=True)
|
|
455
|
+
|
|
456
|
+
|
|
457
|
+
def call_udf(udf_name, func_args = () , **kwargs):
|
|
458
|
+
"""
|
|
459
|
+
DESCRIPTION:
|
|
460
|
+
Call a registered user defined function (UDF).
|
|
461
|
+
|
|
462
|
+
PARAMETERS:
|
|
463
|
+
udf_name:
|
|
464
|
+
Required Argument.
|
|
465
|
+
Specifies the name of the registered user defined.
|
|
466
|
+
Types: str
|
|
467
|
+
|
|
468
|
+
func_args:
|
|
469
|
+
Optional Argument.
|
|
470
|
+
Specifies the arguments to pass to the registered UDF.
|
|
471
|
+
Default Value: ()
|
|
472
|
+
Types: tuple
|
|
473
|
+
|
|
474
|
+
delimiter:
|
|
475
|
+
Optional Argument.
|
|
476
|
+
Specifies a delimiter to use when reading columns from a row and
|
|
477
|
+
writing result columns.
|
|
478
|
+
Notes:
|
|
479
|
+
* This argument cannot be same as "quotechar" argument.
|
|
480
|
+
* This argument cannot be a newline character.
|
|
481
|
+
* Use a different delimiter if categorial columns in the data contains
|
|
482
|
+
a character same as the delimiter.
|
|
483
|
+
Default Value: ','
|
|
484
|
+
Types: one character string
|
|
485
|
+
|
|
486
|
+
quotechar:
|
|
487
|
+
Optional Argument.
|
|
488
|
+
Specifies a character that forces input of the user function
|
|
489
|
+
to be quoted using this specified character.
|
|
490
|
+
Using this argument enables the Analytics Database to
|
|
491
|
+
distinguish between NULL fields and empty strings.
|
|
492
|
+
A string with length zero is quoted, while NULL fields are not.
|
|
493
|
+
Notes:
|
|
494
|
+
* This argument cannot be same as "delimiter" argument.
|
|
495
|
+
* This argument cannot be a newline character.
|
|
496
|
+
Default Value: None
|
|
497
|
+
Types: one character string
|
|
498
|
+
|
|
499
|
+
RETURNS:
|
|
500
|
+
ColumnExpression
|
|
501
|
+
|
|
502
|
+
RAISES:
|
|
503
|
+
TeradataMLException
|
|
504
|
+
|
|
505
|
+
EXAMPLES:
|
|
506
|
+
# Load the data to run the example.
|
|
507
|
+
>>> load_example_data("dataframe", "sales")
|
|
508
|
+
|
|
509
|
+
# Create a DataFrame on 'sales' table.
|
|
510
|
+
>>> import random
|
|
511
|
+
>>> dfsales = DataFrame("sales")
|
|
512
|
+
>>> df = dfsales.assign(id = case([(df.accounts == 'Alpha Co', random.randrange(1, 9)),
|
|
513
|
+
... (df.accounts == 'Blue Inc', random.randrange(1, 9)),
|
|
514
|
+
... (df.accounts == 'Jones LLC', random.randrange(1, 9)),
|
|
515
|
+
... (df.accounts == 'Orange Inc', random.randrange(1, 9)),
|
|
516
|
+
... (df.accounts == 'Yellow Inc', random.randrange(1, 9)),
|
|
517
|
+
... (df.accounts == 'Red Inc', random.randrange(1, 9))]))
|
|
518
|
+
|
|
519
|
+
# Example 1: Register and Call the user defined function to get the values upper case.
|
|
520
|
+
>>> from teradataml.dataframe.functions import udf, register, call_udf
|
|
521
|
+
>>> @udf
|
|
522
|
+
... def to_upper(s):
|
|
523
|
+
... if s is not None:
|
|
524
|
+
... return s.upper()
|
|
525
|
+
>>>
|
|
526
|
+
# Register the created user defined function with name "upper".
|
|
527
|
+
>>> register("upper", to_upper)
|
|
528
|
+
>>>
|
|
529
|
+
# Call the user defined function registered with name "upper" and assign the
|
|
530
|
+
# ColumnExpression returned to the DataFrame.
|
|
531
|
+
>>> res = df.assign(upper_col = call_udf("upper", ('accounts',)))
|
|
532
|
+
>>> res
|
|
533
|
+
Feb Jan Mar Apr datetime id upper_col
|
|
534
|
+
accounts
|
|
535
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 4 YELLOW INC
|
|
536
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2 ALPHA CO
|
|
537
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 5 JONES LLC
|
|
538
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 3 RED INC
|
|
539
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 1 BLUE INC
|
|
540
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 4 ORANGE INC
|
|
541
|
+
>>>
|
|
542
|
+
|
|
543
|
+
# Example 2: Register and Call user defined function to get factorial of a number
|
|
544
|
+
# and store the result in Integer type column.
|
|
545
|
+
>>> from teradataml.dataframe.functions import udf, register
|
|
546
|
+
>>> @udf(returns = INTEGER())
|
|
547
|
+
... def factorial(n):
|
|
548
|
+
... import math
|
|
549
|
+
... return math.factorial(n)
|
|
550
|
+
>>>
|
|
551
|
+
# Register the created user defined function with name "fact".
|
|
552
|
+
>>> from teradatasqlalchemy.types import INTEGER
|
|
553
|
+
>>> register("fact", factorial)
|
|
554
|
+
>>>
|
|
555
|
+
# Call the user defined function registered with name "fact" and assign the
|
|
556
|
+
# ColumnExpression returned to the DataFrame.
|
|
557
|
+
>>> res = df.assign(fact_col = call_udf("fact", ('id',)))
|
|
558
|
+
>>> res
|
|
559
|
+
Feb Jan Mar Apr datetime id fact_col
|
|
560
|
+
accounts
|
|
561
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 5 120
|
|
562
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 4 24
|
|
563
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 3 6
|
|
564
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 1 1
|
|
565
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2 2
|
|
566
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 4 24
|
|
567
|
+
>>>
|
|
568
|
+
|
|
569
|
+
# Example 3: Register and Call the Python function to get the values upper case.
|
|
570
|
+
>>> from teradataml.dataframe.functions import register, call_udf
|
|
571
|
+
>>> def to_upper(s):
|
|
572
|
+
... return s.upper()
|
|
573
|
+
>>>
|
|
574
|
+
# Register the created Python function with name "upper".
|
|
575
|
+
>>> register("upper", to_upper, returns = VARCHAR(1024))
|
|
576
|
+
>>>
|
|
577
|
+
# Call the Python function registered with name "upper" and assign the
|
|
578
|
+
# ColumnExpression returned to the DataFrame.
|
|
579
|
+
>>> res = df.assign(upper_col = call_udf("upper", ('accounts',)))
|
|
580
|
+
>>> res
|
|
581
|
+
Feb Jan Mar Apr datetime id upper_col
|
|
582
|
+
accounts
|
|
583
|
+
Yellow Inc 90.0 NaN NaN NaN 17/01/04 4 YELLOW INC
|
|
584
|
+
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2 ALPHA CO
|
|
585
|
+
Jones LLC 200.0 150.0 140.0 180.0 17/01/04 5 JONES LLC
|
|
586
|
+
Red Inc 200.0 150.0 140.0 NaN 17/01/04 3 RED INC
|
|
587
|
+
Blue Inc 90.0 50.0 95.0 101.0 17/01/04 1 BLUE INC
|
|
588
|
+
Orange Inc 210.0 NaN NaN 250.0 17/01/04 4 ORANGE INC
|
|
589
|
+
>>>
|
|
590
|
+
"""
|
|
591
|
+
env = None
|
|
592
|
+
delimiter = kwargs.pop('delimiter', ',')
|
|
593
|
+
quotechar = kwargs.pop('quotechar', None)
|
|
594
|
+
unknown_args = list(kwargs.keys())
|
|
595
|
+
if len(unknown_args) > 0:
|
|
596
|
+
raise TypeError(Messages.get_message(MessageCodes.UNKNOWN_ARGUMENT,
|
|
597
|
+
"call_udf", unknown_args[0]))
|
|
598
|
+
|
|
599
|
+
if UtilFuncs._is_lake():
|
|
600
|
+
_Validators._check_auth_token("call_udf")
|
|
601
|
+
env = get_env(UtilFuncs._get_env_name())
|
|
602
|
+
file_list = env.files
|
|
603
|
+
if file_list is None:
|
|
604
|
+
raise TeradataMlException(Messages.get_message(
|
|
605
|
+
MessageCodes.FUNC_EXECUTION_FAILED, "'call_udf'", "No UDF is registered with the name '{}'.".format(udf_name)),
|
|
606
|
+
MessageCodes.FUNC_EXECUTION_FAILED)
|
|
607
|
+
file_column = 'File'
|
|
608
|
+
else:
|
|
609
|
+
file_list = list_files().to_pandas()
|
|
610
|
+
file_column = 'Files'
|
|
611
|
+
|
|
612
|
+
# Get the script name from the environment that starts with tdml_udf_name_<udf_name>_.
|
|
613
|
+
script_file = [file for file in file_list[file_column] if file.startswith('tdml_udf_name_{}_udf_type_'.format(udf_name))]
|
|
614
|
+
if len(script_file) != 1:
|
|
615
|
+
raise TeradataMlException(Messages.get_message(
|
|
616
|
+
MessageCodes.FUNC_EXECUTION_FAILED, "'call_udf'", "Multiple UDFs or no UDF is registered with the name '{}'.".format(udf_name)),
|
|
617
|
+
MessageCodes.FUNC_EXECUTION_FAILED)
|
|
618
|
+
|
|
619
|
+
script_name = script_file[0]
|
|
620
|
+
# Get the return type from the script name.
|
|
621
|
+
x = re.search(r"tdml_udf_name_{}_udf_type_([A-Z_]+)(\d*)_register".format(udf_name), script_name)
|
|
622
|
+
returns = getattr(tdsqlalchemy, x.group(1))
|
|
623
|
+
# If the return type has length, get the length from the script name.
|
|
624
|
+
returns = returns(x.group(2)) if x.group(2) else returns()
|
|
625
|
+
|
|
626
|
+
return _SQLColumnExpression(expression=None, udf_args = func_args, udf_script = script_name, udf_type=returns,\
|
|
627
|
+
delimiter=delimiter, quotechar=quotechar, env_name=env)
|
|
628
|
+
|
|
629
|
+
|
|
630
|
+
def list_udfs(show_files=False):
|
|
631
|
+
"""
|
|
632
|
+
DESCRIPTION:
|
|
633
|
+
List all the UDFs registered using 'register()' function.
|
|
634
|
+
|
|
635
|
+
PARAMETERS:
|
|
636
|
+
show_files:
|
|
637
|
+
Optional Argument.
|
|
638
|
+
Specifies whether to show file names or not.
|
|
639
|
+
Default Value: False
|
|
640
|
+
Types: bool
|
|
641
|
+
|
|
642
|
+
RETURNS:
|
|
643
|
+
Pandas DataFrame containing files and it's details or
|
|
644
|
+
None if DataFrame is empty.
|
|
645
|
+
|
|
646
|
+
RAISES:
|
|
647
|
+
TeradataMLException.
|
|
648
|
+
|
|
649
|
+
EXAMPLES:
|
|
650
|
+
# Example 1: Register the user defined function to get the values in lower case,
|
|
651
|
+
then list all the UDFs registered.
|
|
652
|
+
>>> @udf
|
|
653
|
+
... def to_lower(s):
|
|
654
|
+
... if s is not None:
|
|
655
|
+
... return s.lower()
|
|
656
|
+
|
|
657
|
+
# Register the created user defined function.
|
|
658
|
+
>>> register("lower", to_lower)
|
|
659
|
+
|
|
660
|
+
# List all the UDFs registered
|
|
661
|
+
>>> list_udfs(True)
|
|
662
|
+
id name return_type file_name
|
|
663
|
+
0 lower VARCHAR1024 tdml_udf_name_lower_udf_type_VARCHAR1024_register.py
|
|
664
|
+
1 upper VARCHAR1024 tdml_udf_name_upper_udf_type_VARCHAR1024_register.py
|
|
665
|
+
2 add_date DATE tdml_udf_name_add_date_udf_type_DATE_register.py
|
|
666
|
+
3 sum_cols INTEGER tdml_udf_name_sum_cols_udf_type_INTEGER_register.py
|
|
667
|
+
>>>
|
|
668
|
+
"""
|
|
669
|
+
|
|
670
|
+
if UtilFuncs._is_lake():
|
|
671
|
+
_Validators._check_auth_token("list_udfs")
|
|
672
|
+
env_name = UtilFuncs._get_env_name()
|
|
673
|
+
_df = get_env(env_name).files
|
|
674
|
+
if _df is not None:
|
|
675
|
+
# rename the existing DataFrame Column
|
|
676
|
+
_df.rename(columns={'File': 'Files'}, inplace=True)
|
|
677
|
+
_df = _df[_df['Files'].str.startswith('tdml_udf_') & _df['Files'].str.endswith('_register.py')][['Files']]
|
|
678
|
+
if len(_df) == 0:
|
|
679
|
+
print("No files found in remote user environment {}.".format(env_name))
|
|
680
|
+
else:
|
|
681
|
+
return _create_udf_dataframe(_df, show_files)
|
|
682
|
+
|
|
683
|
+
else:
|
|
684
|
+
_df = list_files()
|
|
685
|
+
_df = _df[_df['Files'].startswith('tdml_udf_') & _df['Files'].endswith('_register.py')].to_pandas()
|
|
686
|
+
if len(_df) == 0:
|
|
687
|
+
print("No files found in Vantage")
|
|
688
|
+
else:
|
|
689
|
+
return _create_udf_dataframe(_df, show_files)
|
|
690
|
+
|
|
691
|
+
def _create_udf_dataframe(pandas_df, show_files=False):
|
|
692
|
+
"""
|
|
693
|
+
DESCRIPTION:
|
|
694
|
+
Internal function to return pandas DataFrame with
|
|
695
|
+
column names "id", "name", "return_type", "filename".
|
|
696
|
+
|
|
697
|
+
PARAMETERS:
|
|
698
|
+
pandas_df:
|
|
699
|
+
Required Argument.
|
|
700
|
+
Specifies the pandas DataFrame containing one column 'Files'.
|
|
701
|
+
Types: pandas DataFrame
|
|
702
|
+
|
|
703
|
+
show_files:
|
|
704
|
+
Optional Argument.
|
|
705
|
+
Specifies whether to show file names or not.
|
|
706
|
+
Types: bool
|
|
707
|
+
|
|
708
|
+
RETURNS:
|
|
709
|
+
pandas DataFrame.
|
|
710
|
+
|
|
711
|
+
EXAMPLES:
|
|
712
|
+
>>> _create_udf_dataframe(pandas_dataframe)
|
|
713
|
+
|
|
714
|
+
"""
|
|
715
|
+
_lists = pandas_df.values.tolist()
|
|
716
|
+
_data = {"id": [], "name": [], "return_type": []}
|
|
717
|
+
if show_files:
|
|
718
|
+
_data.update({"file_name": []})
|
|
719
|
+
|
|
720
|
+
for _counter, _list in enumerate(_lists):
|
|
721
|
+
# Extract udf name and type "tdml_udf_name_fact_udf_type_VARCHAR1024_register.py" -> ['fact', 'VARCHAR1024']
|
|
722
|
+
value = _list[0][14:-12].split('_udf_type_')
|
|
723
|
+
_data["id"].append(_counter)
|
|
724
|
+
_data["name"].append(value[0])
|
|
725
|
+
_data["return_type"].append(value[1])
|
|
726
|
+
if show_files:
|
|
727
|
+
_data["file_name"].append(_list[0])
|
|
728
|
+
return pd.DataFrame(_data)
|
|
729
|
+
|
|
730
|
+
|
|
731
|
+
def deregister(name, returns=None):
|
|
732
|
+
"""
|
|
733
|
+
DESCRIPTION:
|
|
734
|
+
Deregisters a user defined function (UDF).
|
|
735
|
+
|
|
736
|
+
PARAMETERS:
|
|
737
|
+
name:
|
|
738
|
+
Required Argument.
|
|
739
|
+
Specifies the name of the user defined function to deregister.
|
|
740
|
+
Types: str
|
|
741
|
+
|
|
742
|
+
returns:
|
|
743
|
+
Optional Argument.
|
|
744
|
+
Specifies the type used to deregister the user defined function.
|
|
745
|
+
Types: teradatasqlalchemy types object
|
|
746
|
+
|
|
747
|
+
RETURNS:
|
|
748
|
+
None
|
|
749
|
+
|
|
750
|
+
RAISES:
|
|
751
|
+
TeradataMLException.
|
|
752
|
+
|
|
753
|
+
EXAMPLES:
|
|
754
|
+
# Example 1: Register the user defined function to get the values in lower case,
|
|
755
|
+
# then deregister it.
|
|
756
|
+
>>> @udf
|
|
757
|
+
... def to_lower(s):
|
|
758
|
+
... if s is not None:
|
|
759
|
+
... return s.lower()
|
|
760
|
+
|
|
761
|
+
# Register the created user defined function.
|
|
762
|
+
>>> register("lower", to_lower)
|
|
763
|
+
|
|
764
|
+
# List all the UDFs registered
|
|
765
|
+
>>> list_udfs(True)
|
|
766
|
+
id name return_type file_name
|
|
767
|
+
0 lower VARCHAR1024 tdml_udf_name_lower_udf_type_VARCHAR1024_register.py
|
|
768
|
+
1 upper VARCHAR1024 tdml_udf_name_upper_udf_type_VARCHAR1024_register.py
|
|
769
|
+
2 add_date DATE tdml_udf_name_add_date_udf_type_DATE_register.py
|
|
770
|
+
3 sum_cols INTEGER tdml_udf_name_sum_cols_udf_type_INTEGER_register.py
|
|
771
|
+
>>>
|
|
772
|
+
|
|
773
|
+
# Deregister the created user defined function.
|
|
774
|
+
>>> deregister("lower")
|
|
775
|
+
|
|
776
|
+
# List all the UDFs registered
|
|
777
|
+
>>> list_udfs(True)
|
|
778
|
+
id name return_type file_name
|
|
779
|
+
0 upper VARCHAR1024 tdml_udf_name_upper_udf_type_VARCHAR1024_register.py
|
|
780
|
+
1 add_date DATE tdml_udf_name_add_date_udf_type_DATE_register.py
|
|
781
|
+
2 sum_cols INTEGER tdml_udf_name_sum_cols_udf_type_INTEGER_register.py
|
|
782
|
+
>>>
|
|
783
|
+
|
|
784
|
+
# Example 2: Deregister only specified udf function with it return type.
|
|
785
|
+
>>> @udf(returns=FLOAT())
|
|
786
|
+
... def sum(x, y):
|
|
787
|
+
... return len(x) + y
|
|
788
|
+
|
|
789
|
+
# Deregister the created user defined function.
|
|
790
|
+
>>> register("sum", sum)
|
|
791
|
+
|
|
792
|
+
# List all the UDFs registered
|
|
793
|
+
>>> list_udfs(True)
|
|
794
|
+
id name return_type file_name
|
|
795
|
+
0 sum FLOAT tdml_udf_name_sum_udf_type_FLOAT_register.py
|
|
796
|
+
1 sum INTEGER tdml_udf_name_sum_udf_type_INTEGER_register.py
|
|
797
|
+
>>>
|
|
798
|
+
|
|
799
|
+
# Deregister the created user defined function.
|
|
800
|
+
>>> from teradatasqlalchemy import FLOAT
|
|
801
|
+
>>> deregister("sum", FLOAT())
|
|
802
|
+
|
|
803
|
+
# List all the UDFs registered
|
|
804
|
+
>>> list_udfs(True)
|
|
805
|
+
id name return_type file_name
|
|
806
|
+
0 sum INTEGER tdml_udf_name_sum_udf_type_INTEGER_register.py
|
|
807
|
+
>>>
|
|
808
|
+
"""
|
|
809
|
+
_df = list_udfs(show_files=True)
|
|
810
|
+
# raise Exception list_udfs when DataFrame is empty
|
|
811
|
+
if _df is None:
|
|
812
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
|
|
813
|
+
"'deregister'",
|
|
814
|
+
f"UDF '{name}' does not exist."),
|
|
815
|
+
MessageCodes.FUNC_EXECUTION_FAILED)
|
|
816
|
+
|
|
817
|
+
if returns is None:
|
|
818
|
+
_df = _df[_df['file_name'].str.startswith(f'tdml_udf_name_{name}_udf_type_')]
|
|
819
|
+
else:
|
|
820
|
+
_df = _df[_df['file_name'].str.startswith(f'tdml_udf_name_{name}_udf_type_{_create_return_type(returns)}_register.py')]
|
|
821
|
+
|
|
822
|
+
if len(_df) == 0:
|
|
823
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
|
|
824
|
+
"'deregister'",
|
|
825
|
+
f"UDF '{name}' does not exist."),
|
|
826
|
+
MessageCodes.FUNC_EXECUTION_FAILED)
|
|
827
|
+
|
|
828
|
+
_df = _df.values.tolist()
|
|
829
|
+
|
|
830
|
+
# Remove the file on the lake/enterprise environment.
|
|
831
|
+
if UtilFuncs._is_lake():
|
|
832
|
+
env = get_env(UtilFuncs._get_env_name())
|
|
833
|
+
for file_name in _df:
|
|
834
|
+
env.remove_file(file_name[3], suppress_output=True)
|
|
835
|
+
else:
|
|
836
|
+
for file_name in _df:
|
|
837
|
+
remove_file(file_name[3][:-3], force_remove = True, suppress_output = True)
|
|
838
|
+
|
|
839
|
+
|
|
840
|
+
def _create_return_type(returns):
|
|
841
|
+
"""
|
|
842
|
+
DESCRIPTION:
|
|
843
|
+
Internal function to return string representation of
|
|
844
|
+
type "returns" in such a way it is included in file name.
|
|
845
|
+
|
|
846
|
+
PARAMETERS:
|
|
847
|
+
returns:
|
|
848
|
+
Required Argument.
|
|
849
|
+
Specifies the teradatasqlalchemy types object.
|
|
850
|
+
Types: teradatasqlalchemy types object
|
|
851
|
+
|
|
852
|
+
RETURNS:
|
|
853
|
+
string
|
|
854
|
+
|
|
855
|
+
EXAMPLES:
|
|
856
|
+
>>> _create_udf_dataframe(VARCHAR(1024))
|
|
857
|
+
'VARCHAR1024'
|
|
858
|
+
"""
|
|
859
|
+
if isinstance(returns, (VARCHAR, CLOB, CHAR)):
|
|
860
|
+
# If the length is not provided, set it to empty string.
|
|
861
|
+
str_len = str(returns.length) if returns.length else ""
|
|
862
|
+
return_str = str(returns) + str_len
|
|
863
|
+
else:
|
|
864
|
+
return_str = str(returns)
|
|
865
|
+
# Replace the space with underscore in the return type.
|
|
866
|
+
return_str = return_str.replace(" ", "_")
|
|
867
|
+
return return_str
|