teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (240) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +306 -0
  4. teradataml/__init__.py +10 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +299 -16
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +13 -3
  11. teradataml/analytics/json_parser/utils.py +13 -6
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +11 -2
  15. teradataml/analytics/table_operator/__init__.py +4 -3
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +66 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1502 -323
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +247 -307
  22. teradataml/automl/data_transformation.py +32 -12
  23. teradataml/automl/feature_engineering.py +325 -86
  24. teradataml/automl/model_evaluation.py +44 -35
  25. teradataml/automl/model_training.py +122 -153
  26. teradataml/catalog/byom.py +8 -8
  27. teradataml/clients/pkce_client.py +1 -1
  28. teradataml/common/__init__.py +2 -1
  29. teradataml/common/constants.py +72 -0
  30. teradataml/common/deprecations.py +13 -7
  31. teradataml/common/garbagecollector.py +152 -120
  32. teradataml/common/messagecodes.py +11 -2
  33. teradataml/common/messages.py +4 -1
  34. teradataml/common/sqlbundle.py +26 -4
  35. teradataml/common/utils.py +225 -14
  36. teradataml/common/wrapper_utils.py +1 -1
  37. teradataml/context/context.py +82 -2
  38. teradataml/data/SQL_Fundamentals.pdf +0 -0
  39. teradataml/data/complaints_test_tokenized.csv +353 -0
  40. teradataml/data/complaints_tokens_model.csv +348 -0
  41. teradataml/data/covid_confirm_sd.csv +83 -0
  42. teradataml/data/dataframe_example.json +27 -1
  43. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  44. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  45. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  46. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  47. teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
  48. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  49. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  50. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  51. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  52. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  53. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  54. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  55. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  56. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  57. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  58. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  59. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  60. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  61. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  62. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  63. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  64. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  65. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  66. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  67. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  68. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  69. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  70. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  71. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  72. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  74. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  75. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  76. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  77. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  78. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  79. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  80. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  81. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  82. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  83. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  84. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  85. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  86. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  87. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  88. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  89. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  90. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  91. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  92. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  93. teradataml/data/dwt2d_dataTable.csv +65 -0
  94. teradataml/data/dwt_dataTable.csv +8 -0
  95. teradataml/data/dwt_filterTable.csv +3 -0
  96. teradataml/data/finance_data4.csv +13 -0
  97. teradataml/data/grocery_transaction.csv +19 -0
  98. teradataml/data/idwt2d_dataTable.csv +5 -0
  99. teradataml/data/idwt_dataTable.csv +8 -0
  100. teradataml/data/idwt_filterTable.csv +3 -0
  101. teradataml/data/interval_data.csv +5 -0
  102. teradataml/data/jsons/paired_functions.json +14 -0
  103. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  104. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  105. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  106. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  107. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  108. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  109. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  110. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  111. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  112. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  113. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  114. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  115. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  116. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  117. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  118. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  119. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  120. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  121. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  122. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  123. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  124. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  125. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  126. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  127. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  128. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  129. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  130. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  131. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  132. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  133. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  134. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  135. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  136. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  137. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  138. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  139. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  140. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  141. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  142. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  143. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  144. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  145. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  146. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  147. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  148. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  149. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  150. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  151. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  152. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  153. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  154. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  155. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  156. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  157. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  158. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  159. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  160. teradataml/data/load_example_data.py +8 -2
  161. teradataml/data/medical_readings.csv +101 -0
  162. teradataml/data/naivebayestextclassifier_example.json +1 -1
  163. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  164. teradataml/data/patient_profile.csv +101 -0
  165. teradataml/data/peppers.png +0 -0
  166. teradataml/data/real_values.csv +14 -0
  167. teradataml/data/sax_example.json +8 -0
  168. teradataml/data/scripts/deploy_script.py +1 -1
  169. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  170. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  171. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  172. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  173. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
  174. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  175. teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
  176. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  177. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  178. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  179. teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
  180. teradataml/data/star_pivot.csv +8 -0
  181. teradataml/data/target_udt_data.csv +8 -0
  182. teradataml/data/templates/open_source_ml.json +3 -1
  183. teradataml/data/teradataml_example.json +20 -1
  184. teradataml/data/timestamp_data.csv +4 -0
  185. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  186. teradataml/data/uaf_example.json +55 -1
  187. teradataml/data/unpivot_example.json +15 -0
  188. teradataml/data/url_data.csv +9 -0
  189. teradataml/data/vectordistance_example.json +4 -0
  190. teradataml/data/windowdfft.csv +16 -0
  191. teradataml/dataframe/copy_to.py +1 -1
  192. teradataml/dataframe/data_transfer.py +5 -3
  193. teradataml/dataframe/dataframe.py +1002 -201
  194. teradataml/dataframe/fastload.py +3 -3
  195. teradataml/dataframe/functions.py +867 -0
  196. teradataml/dataframe/row.py +160 -0
  197. teradataml/dataframe/setop.py +2 -2
  198. teradataml/dataframe/sql.py +840 -33
  199. teradataml/dataframe/window.py +1 -1
  200. teradataml/dbutils/dbutils.py +878 -34
  201. teradataml/dbutils/filemgr.py +48 -1
  202. teradataml/geospatial/geodataframe.py +1 -1
  203. teradataml/geospatial/geodataframecolumn.py +1 -1
  204. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  205. teradataml/lib/aed_0_1.dll +0 -0
  206. teradataml/opensource/__init__.py +1 -1
  207. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  208. teradataml/opensource/_lightgbm.py +950 -0
  209. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  210. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  211. teradataml/opensource/sklearn/__init__.py +0 -1
  212. teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
  213. teradataml/options/__init__.py +9 -23
  214. teradataml/options/configure.py +42 -4
  215. teradataml/options/display.py +2 -2
  216. teradataml/plot/axis.py +4 -4
  217. teradataml/scriptmgmt/UserEnv.py +13 -9
  218. teradataml/scriptmgmt/lls_utils.py +77 -23
  219. teradataml/store/__init__.py +13 -0
  220. teradataml/store/feature_store/__init__.py +0 -0
  221. teradataml/store/feature_store/constants.py +291 -0
  222. teradataml/store/feature_store/feature_store.py +2223 -0
  223. teradataml/store/feature_store/models.py +1505 -0
  224. teradataml/store/vector_store/__init__.py +1586 -0
  225. teradataml/table_operators/Script.py +2 -2
  226. teradataml/table_operators/TableOperator.py +106 -20
  227. teradataml/table_operators/query_generator.py +3 -0
  228. teradataml/table_operators/table_operator_query_generator.py +3 -1
  229. teradataml/table_operators/table_operator_util.py +102 -56
  230. teradataml/table_operators/templates/dataframe_register.template +69 -0
  231. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  232. teradataml/telemetry_utils/__init__.py +0 -0
  233. teradataml/telemetry_utils/queryband.py +52 -0
  234. teradataml/utils/dtypes.py +4 -2
  235. teradataml/utils/validators.py +34 -2
  236. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
  237. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
  238. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  239. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  240. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -0,0 +1,867 @@
1
+ import pandas as pd
2
+ from inspect import getsource
3
+ import re
4
+ from types import FunctionType
5
+ from teradataml.dbutils.filemgr import install_file, list_files, remove_file
6
+ from teradataml.options.configure import configure
7
+ import teradatasqlalchemy as tdsqlalchemy
8
+ from teradataml.utils.validators import _Validators
9
+ from teradataml.dataframe.sql import _SQLColumnExpression
10
+ from teradatasqlalchemy import VARCHAR, CLOB, CHAR
11
+ from teradataml.common.constants import TeradataTypes
12
+ from teradataml.common.utils import UtilFuncs
13
+ from teradataml.utils.dtypes import _Dtypes
14
+ from teradataml.dataframe.sql_interfaces import ColumnExpression
15
+ from teradataml.table_operators.table_operator_util import _TableOperatorUtils
16
+ from teradataml.utils.internal_buffer import _InternalBuffer
17
+ from teradataml.common.exceptions import TeradataMlException
18
+ from teradataml.common.messages import Messages
19
+ from teradataml.common.messagecodes import MessageCodes
20
+ from teradataml.scriptmgmt.lls_utils import get_env
21
+
22
+ def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',', quotechar=None):
23
+ """
24
+ DESCRIPTION:
25
+ Creates a user defined function (UDF).
26
+
27
+ PARAMETERS:
28
+ user_function:
29
+ Required Argument.
30
+ Specifies the user defined function to create a column for
31
+ teradataml DataFrame.
32
+ Types: function
33
+ Note:
34
+ 1. Lambda Function are not supported.
35
+
36
+ returns:
37
+ Optional Argument.
38
+ Specifies the output column type.
39
+ Types: teradatasqlalchemy types object
40
+ Default: VARCHAR(1024)
41
+
42
+ env_name:
43
+ Optional Argument.
44
+ Specifies the name of the remote user environment or an object of
45
+ class UserEnv for VantageCloud Lake.
46
+ Types: str or oject of class UserEnv.
47
+ Note:
48
+ * One can set up a user environment with required packages using teradataml
49
+ Open Analytics APIs. If no ``env_name`` is provided, udf use the default
50
+ ``openml_env`` user environment. This default environment has latest Python
51
+ and scikit-learn versions that are supported by Open Analytics Framework
52
+ at the time of creating environment.
53
+
54
+ delimiter:
55
+ Optional Argument.
56
+ Specifies a delimiter to use when reading columns from a row and
57
+ writing result columns.
58
+ Default value: ','
59
+ Types: str with one character
60
+ Notes:
61
+ * This argument cannot be same as "quotechar" argument.
62
+ * This argument cannot be a newline character.
63
+ * Use a different delimiter if categorial columns in the data contains
64
+ a character same as the delimiter.
65
+
66
+ quotechar:
67
+ Optional Argument.
68
+ Specifies a character that forces input of the user function
69
+ to be quoted using this specified character.
70
+ Using this argument enables the Advanced SQL Engine to
71
+ distinguish between NULL fields and empty strings.
72
+ A string with length zero is quoted, while NULL fields are not.
73
+ Default value: None
74
+ Types: str with one character
75
+ Notes:
76
+ * This argument cannot be same as "delimiter" argument.
77
+ * This argument cannot be a newline character.
78
+
79
+ RETURNS:
80
+ ColumnExpression
81
+
82
+ RAISES:
83
+ TeradataMLException
84
+
85
+ NOTES:
86
+ 1. While working on date and time data types one must format these to supported formats.
87
+ (See Requisite Input and Output Structures in Open Analytics Framework for more details.)
88
+ 2. Required packages to run the user defined function must be installed in remote user
89
+ environment using install_lib function Of UserEnv class. Import statements of these
90
+ packages should be inside the user defined function itself.
91
+ 3. One can't call a regular function defined outside the udf from the user defined function.
92
+ The function definition and call must be inside the udf. Look at Example 9 to understand more.
93
+
94
+ EXAMPLES:
95
+ # Load the data to run the example.
96
+ >>> load_example_data("dataframe", "sales")
97
+
98
+ # Create a DataFrame on 'sales' table.
99
+ >>> df = DataFrame("sales")
100
+ >>> df
101
+ Feb Jan Mar Apr datetime
102
+ accounts
103
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
104
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
105
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
106
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
107
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
108
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
109
+
110
+ # Example 1: Create the user defined function to get the values in 'accounts'
111
+ # to upper case without passing returns argument.
112
+ >>> from teradataml.dataframe.functions import udf
113
+ >>> @udf
114
+ ... def to_upper(s):
115
+ ... if s is not None:
116
+ ... return s.upper()
117
+ >>>
118
+ # Assign the Column Expression returned by user defined function
119
+ # to the DataFrame.
120
+ >>> res = df.assign(upper_stats = to_upper('accounts'))
121
+ >>> res
122
+ Feb Jan Mar Apr datetime upper_stats
123
+ accounts
124
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO
125
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC
126
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC
127
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC
128
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC
129
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC
130
+ >>>
131
+
132
+ # Example 2: Create a user defined function to add length of string values in column
133
+ # 'accounts' with column 'Feb' and store the result in Integer type column.
134
+ >>> from teradatasqlalchemy.types import INTEGER
135
+ >>> @udf(returns=INTEGER())
136
+ ... def sum(x, y):
137
+ ... return len(x)+y
138
+ >>>
139
+ # Assign the Column Expression returned by user defined function
140
+ # to the DataFrame.
141
+ >>> res = df.assign(len_sum = sum('accounts', 'Feb'))
142
+ >>> res
143
+ Feb Jan Mar Apr datetime len_sum
144
+ accounts
145
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 218
146
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 98
147
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 100
148
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 209
149
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 220
150
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 207
151
+ >>>
152
+
153
+ # Example 3: Create a function to get the values in 'accounts' to upper case
154
+ # and pass it to udf as parameter to create a user defined function.
155
+ >>> from teradataml.dataframe.functions import udf
156
+ >>> def to_upper(s):
157
+ ... if s is not None:
158
+ ... return s.upper()
159
+ >>> upper_case = udf(to_upper)
160
+ >>>
161
+ # Assign the Column Expression returned by user defined function
162
+ # to the DataFrame.
163
+ >>> res = df.assign(upper_stats = upper_case('accounts'))
164
+ >>> res
165
+ Feb Jan Mar Apr datetime upper_stats
166
+ accounts
167
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO
168
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC
169
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC
170
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC
171
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC
172
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC
173
+ >>>
174
+
175
+ # Example 4: Create a user defined function to add 4 to the 'datetime' column
176
+ # and store the result in DATE type column.
177
+ >>> from teradatasqlalchemy.types import DATE
178
+ >>> import datetime
179
+ >>> @udf(returns=DATE())
180
+ ... def add_date(x, y):
181
+ ... return (datetime.datetime.strptime(x, "%y/%m/%d")+datetime.timedelta(y)).strftime("%y/%m/%d")
182
+ >>>
183
+ # Assign the Column Expression returned by user defined function
184
+ # to the DataFrame.
185
+ >>> res = df.assign(new_date = add_date('datetime', 4))
186
+ >>> res
187
+ Feb Jan Mar Apr datetime new_date
188
+ accounts
189
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 17/01/08
190
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 17/01/08
191
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 17/01/08
192
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 17/01/08
193
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 17/01/08
194
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 17/01/08
195
+
196
+ # Example 5: Create a user defined function to add 4 to the 'datetime' column
197
+ # without passing returns argument.
198
+ >>> from teradatasqlalchemy.types import DATE
199
+ >>> import datetime
200
+ >>> @udf
201
+ ... def add_date(x, y):
202
+ ... return (datetime.datetime.strptime(x, "%y/%m/%d")+datetime.timedelta(y))
203
+ >>>
204
+ # Assign the Column Expression returned by user defined function
205
+ # to the DataFrame.
206
+ >>> res = df.assign(new_date = add_date('datetime', 4))
207
+ >>> res
208
+ Feb Jan Mar Apr datetime new_date
209
+ accounts
210
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 2017-01-08 00:00:00
211
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 2017-01-08 00:00:00
212
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 2017-01-08 00:00:00
213
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 2017-01-08 00:00:00
214
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 2017-01-08 00:00:00
215
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2017-01-08 00:00:00
216
+
217
+ # Example 6: Create a two user defined function to 'to_upper' and 'sum',
218
+ # 'to_upper' to get the values in 'accounts' to upper case and
219
+ # 'sum' to add length of string values in column 'accounts'
220
+ # with column 'Feb' and store the result in Integer type column.
221
+ >>> @udf
222
+ ... def to_upper(s):
223
+ ... if s is not None:
224
+ ... return s.upper()
225
+ >>>
226
+ >>> from teradatasqlalchemy.types import INTEGER
227
+ >>> @udf(returns=INTEGER())
228
+ ... def sum(x, y):
229
+ ... return len(x)+y
230
+ >>>
231
+ # Assign the both Column Expression returned by user defined functions
232
+ # to the DataFrame.
233
+ >>> res = df.assign(upper_stats = to_upper('accounts'), len_sum = sum('accounts', 'Feb'))
234
+ >>> res
235
+ Feb Jan Mar Apr datetime upper_stats len_sum
236
+ accounts
237
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC 98
238
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC 207
239
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC 100
240
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC 209
241
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC 220
242
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO 218
243
+ >>>
244
+
245
+ # Example 7: Convert the values is 'accounts' column to upper case using a user
246
+ # defined function on Vantage Cloud Lake.
247
+ # Create a Python 3.10.5 environment with given name and description in Vantage.
248
+ >>> env = create_env('test_udf', 'python_3.10.5', 'Test environment for UDF')
249
+ User environment 'test_udf' created.
250
+ >>>
251
+ # Create a user defined functions to 'to_upper' to get the values in upper case
252
+ # and pass the user env to run it on.
253
+ >>> from teradataml.dataframe.functions import udf
254
+ >>> @udf(env_name = env)
255
+ ... def to_upper(s):
256
+ ... if s is not None:
257
+ ... return s.upper()
258
+ >>>
259
+ # Assign the Column Expression returned by user defined function
260
+ # to the DataFrame.
261
+ >>> df.assign(upper_stats = to_upper('accounts'))
262
+ Feb Jan Mar Apr datetime upper_stats
263
+ accounts
264
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 ALPHA CO
265
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 BLUE INC
266
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 YELLOW INC
267
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 JONES LLC
268
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 ORANGE INC
269
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 RED INC
270
+
271
+ # Example 8: Create a user defined function to add 4 to the 'datetime' column
272
+ # and store the result in DATE type column on Vantage Cloud Lake.
273
+ >>> from teradatasqlalchemy.types import DATE
274
+ >>> import datetime
275
+ >>> @udf(returns=DATE())
276
+ ... def add_date(x, y):
277
+ ... return (datetime.datetime.strptime(x, "%Y-%m-%d")+datetime.timedelta(y)).strftime("%Y-%m-%d")
278
+ >>>
279
+ # Assign the Column Expression returned by user defined function
280
+ # to the DataFrame.
281
+ >>> res = df.assign(new_date = add_date('datetime', 4))
282
+ >>> res
283
+ Feb Jan Mar Apr datetime new_date
284
+ accounts
285
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 17/01/08
286
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 17/01/08
287
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 17/01/08
288
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 17/01/08
289
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 17/01/08
290
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 17/01/08
291
+ >>>
292
+
293
+ # Example 9: Define a function 'inner_add_date' inside the udf to create a
294
+ # date object by passing year, month, and day and add 1 to that date.
295
+ # Call this function inside the user defined function.
296
+ >>> @udf
297
+ ... def add_date(y,m,d):
298
+ ... import datetime
299
+ ... def inner_add_date(y,m,d):
300
+ ... return datetime.date(y,m,d) + datetime.timedelta(1)
301
+ ... return inner_add_date(y,m,d)
302
+
303
+ # Assign the Column Expression returned by user defined function
304
+ # to the DataFrame.
305
+ >>> res = df.assign(new_date = add_date(2021, 10, 5))
306
+ >>> res
307
+ Feb Jan Mar Apr datetime new_date
308
+ accounts
309
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 2021-10-06
310
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 2021-10-06
311
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 2021-10-06
312
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 2021-10-06
313
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2021-10-06
314
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 2021-10-06
315
+ >>>
316
+ """
317
+
318
+ allowed_datatypes = TeradataTypes.TD_ALL_TYPES.value
319
+ # Validate datatypes in returns.
320
+ _Validators._validate_function_arguments([["returns", returns, False, allowed_datatypes]])
321
+
322
+ # Notation: @udf(returnType=INTEGER())
323
+ if user_function is None:
324
+ def wrapper(f):
325
+ def func_(*args):
326
+ return _SQLColumnExpression(expression=None, udf=f, udf_type=returns, udf_args=args,\
327
+ env_name=env_name, delimiter=delimiter, quotechar=quotechar)
328
+ return func_
329
+ return wrapper
330
+ # Notation: @udf
331
+ else:
332
+ def func_(*args):
333
+ return _SQLColumnExpression(expression=None, udf=user_function, udf_type=returns, udf_args=args,\
334
+ env_name=env_name, delimiter=delimiter, quotechar=quotechar)
335
+ return func_
336
+
337
+
338
+ def register(name, user_function, returns=VARCHAR(1024)):
339
+ """
340
+ DESCRIPTION:
341
+ Registers a user defined function (UDF).
342
+
343
+ PARAMETERS:
344
+ name:
345
+ Required Argument.
346
+ Specifies the name of the user defined function to register.
347
+ Types: str
348
+
349
+ user_function:
350
+ Required Argument.
351
+ Specifies the user defined function to create a column for
352
+ teradataml DataFrame.
353
+ Types: function, udf
354
+
355
+ returns:
356
+ Optional Argument.
357
+ Specifies the output column type used to register the user defined function.
358
+ Note:
359
+ * If 'user_function' is a udf, then return type of the udf is used as return type
360
+ of the registered user defined function.
361
+ Default Value: VARCHAR(1024)
362
+ Types: teradatasqlalchemy types object
363
+
364
+ RETURNS:
365
+ None
366
+
367
+ RAISES:
368
+ TeradataMLException, TypeError
369
+
370
+ EXAMPLES:
371
+ # Example 1: Register the user defined function to get the values upper case.
372
+ >>> from teradataml.dataframe.functions import udf, register
373
+ >>> @udf
374
+ ... def to_upper(s):
375
+ ... if s is not None:
376
+ ... return s.upper()
377
+ >>>
378
+ # Register the created user defined function.
379
+ >>> register("upper_val", to_upper)
380
+ >>>
381
+
382
+ # Example 2: Register a user defined function to get factorial of a number and
383
+ # store the result in Integer type column.
384
+ >>> from teradataml.dataframe.functions import udf, register
385
+ >>> from teradatasqlalchemy.types import INTEGER
386
+ >>> @udf
387
+ ... def factorial(n):
388
+ ... import math
389
+ ... return math.factorial(n)
390
+ >>>
391
+ # Register the created user defined function.
392
+ >>> register("fact", factorial, INTEGER())
393
+ >>>
394
+
395
+ # Example 3: Register a Python function to get the values upper case.
396
+ >>> from teradataml.dataframe.functions import register
397
+ >>> def to_upper(s):
398
+ ... return s.upper()
399
+ >>>
400
+ # Register the created Python function.
401
+ >>> register("upper_val", to_upper)
402
+ >>>
403
+ """
404
+
405
+ # Validate the arguments.
406
+ arg_matrix = []
407
+ allowed_datatypes = TeradataTypes.TD_ALL_TYPES.value
408
+ arg_matrix.append(["returns", returns, True, allowed_datatypes])
409
+ arg_matrix.append(["name", name, False, str])
410
+ _Validators._validate_function_arguments(arg_matrix)
411
+
412
+ function = []
413
+ # Check if the user_function is Python function or
414
+ # a user defined function(udf) or ColumnExpression returned by udf.
415
+ if isinstance(user_function, ColumnExpression):
416
+ function.append(user_function._udf)
417
+ returns = user_function._type
418
+ elif "udf.<locals>" not in user_function.__qualname__:
419
+ function.append(user_function)
420
+ else:
421
+ user_function = user_function.__call__()
422
+ function.append(user_function._udf)
423
+ returns = user_function._type
424
+
425
+ # Create a dictionary of user defined function name to return type.
426
+ returns = {name: _create_return_type(returns)}
427
+
428
+ exec_mode = 'REMOTE' if UtilFuncs._is_lake() else 'IN-DB'
429
+
430
+ tbl_operators = _TableOperatorUtils([],
431
+ None,
432
+ "register",
433
+ function,
434
+ exec_mode,
435
+ chunk_size=None,
436
+ num_rows=1,
437
+ delimiter=None,
438
+ quotechar=None,
439
+ data_partition_column=None,
440
+ data_hash_column=None,
441
+ style = "csv",
442
+ returns = returns,
443
+ )
444
+
445
+ # Install the file on the lake/enterprise environment.
446
+ if exec_mode == 'REMOTE':
447
+ _Validators._check_auth_token("register")
448
+ env_name = UtilFuncs._get_env_name()
449
+ tbl_operators.__env = get_env(env_name)
450
+ tbl_operators.__env.install_file(tbl_operators.script_path, suppress_output=True, replace=True)
451
+ else:
452
+ install_file(file_identifier=tbl_operators.script_base_name,
453
+ file_path=tbl_operators.script_path,
454
+ suppress_output=True, replace=True)
455
+
456
+
457
+ def call_udf(udf_name, func_args = () , **kwargs):
458
+ """
459
+ DESCRIPTION:
460
+ Call a registered user defined function (UDF).
461
+
462
+ PARAMETERS:
463
+ udf_name:
464
+ Required Argument.
465
+ Specifies the name of the registered user defined.
466
+ Types: str
467
+
468
+ func_args:
469
+ Optional Argument.
470
+ Specifies the arguments to pass to the registered UDF.
471
+ Default Value: ()
472
+ Types: tuple
473
+
474
+ delimiter:
475
+ Optional Argument.
476
+ Specifies a delimiter to use when reading columns from a row and
477
+ writing result columns.
478
+ Notes:
479
+ * This argument cannot be same as "quotechar" argument.
480
+ * This argument cannot be a newline character.
481
+ * Use a different delimiter if categorial columns in the data contains
482
+ a character same as the delimiter.
483
+ Default Value: ','
484
+ Types: one character string
485
+
486
+ quotechar:
487
+ Optional Argument.
488
+ Specifies a character that forces input of the user function
489
+ to be quoted using this specified character.
490
+ Using this argument enables the Analytics Database to
491
+ distinguish between NULL fields and empty strings.
492
+ A string with length zero is quoted, while NULL fields are not.
493
+ Notes:
494
+ * This argument cannot be same as "delimiter" argument.
495
+ * This argument cannot be a newline character.
496
+ Default Value: None
497
+ Types: one character string
498
+
499
+ RETURNS:
500
+ ColumnExpression
501
+
502
+ RAISES:
503
+ TeradataMLException
504
+
505
+ EXAMPLES:
506
+ # Load the data to run the example.
507
+ >>> load_example_data("dataframe", "sales")
508
+
509
+ # Create a DataFrame on 'sales' table.
510
+ >>> import random
511
+ >>> dfsales = DataFrame("sales")
512
+ >>> df = dfsales.assign(id = case([(df.accounts == 'Alpha Co', random.randrange(1, 9)),
513
+ ... (df.accounts == 'Blue Inc', random.randrange(1, 9)),
514
+ ... (df.accounts == 'Jones LLC', random.randrange(1, 9)),
515
+ ... (df.accounts == 'Orange Inc', random.randrange(1, 9)),
516
+ ... (df.accounts == 'Yellow Inc', random.randrange(1, 9)),
517
+ ... (df.accounts == 'Red Inc', random.randrange(1, 9))]))
518
+
519
+ # Example 1: Register and Call the user defined function to get the values upper case.
520
+ >>> from teradataml.dataframe.functions import udf, register, call_udf
521
+ >>> @udf
522
+ ... def to_upper(s):
523
+ ... if s is not None:
524
+ ... return s.upper()
525
+ >>>
526
+ # Register the created user defined function with name "upper".
527
+ >>> register("upper", to_upper)
528
+ >>>
529
+ # Call the user defined function registered with name "upper" and assign the
530
+ # ColumnExpression returned to the DataFrame.
531
+ >>> res = df.assign(upper_col = call_udf("upper", ('accounts',)))
532
+ >>> res
533
+ Feb Jan Mar Apr datetime id upper_col
534
+ accounts
535
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 4 YELLOW INC
536
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2 ALPHA CO
537
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 5 JONES LLC
538
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 3 RED INC
539
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 1 BLUE INC
540
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 4 ORANGE INC
541
+ >>>
542
+
543
+ # Example 2: Register and Call user defined function to get factorial of a number
544
+ # and store the result in Integer type column.
545
+ >>> from teradataml.dataframe.functions import udf, register
546
+ >>> @udf(returns = INTEGER())
547
+ ... def factorial(n):
548
+ ... import math
549
+ ... return math.factorial(n)
550
+ >>>
551
+ # Register the created user defined function with name "fact".
552
+ >>> from teradatasqlalchemy.types import INTEGER
553
+ >>> register("fact", factorial)
554
+ >>>
555
+ # Call the user defined function registered with name "fact" and assign the
556
+ # ColumnExpression returned to the DataFrame.
557
+ >>> res = df.assign(fact_col = call_udf("fact", ('id',)))
558
+ >>> res
559
+ Feb Jan Mar Apr datetime id fact_col
560
+ accounts
561
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 5 120
562
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 4 24
563
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 3 6
564
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 1 1
565
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2 2
566
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 4 24
567
+ >>>
568
+
569
+ # Example 3: Register and Call the Python function to get the values upper case.
570
+ >>> from teradataml.dataframe.functions import register, call_udf
571
+ >>> def to_upper(s):
572
+ ... return s.upper()
573
+ >>>
574
+ # Register the created Python function with name "upper".
575
+ >>> register("upper", to_upper, returns = VARCHAR(1024))
576
+ >>>
577
+ # Call the Python function registered with name "upper" and assign the
578
+ # ColumnExpression returned to the DataFrame.
579
+ >>> res = df.assign(upper_col = call_udf("upper", ('accounts',)))
580
+ >>> res
581
+ Feb Jan Mar Apr datetime id upper_col
582
+ accounts
583
+ Yellow Inc 90.0 NaN NaN NaN 17/01/04 4 YELLOW INC
584
+ Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2 ALPHA CO
585
+ Jones LLC 200.0 150.0 140.0 180.0 17/01/04 5 JONES LLC
586
+ Red Inc 200.0 150.0 140.0 NaN 17/01/04 3 RED INC
587
+ Blue Inc 90.0 50.0 95.0 101.0 17/01/04 1 BLUE INC
588
+ Orange Inc 210.0 NaN NaN 250.0 17/01/04 4 ORANGE INC
589
+ >>>
590
+ """
591
+ env = None
592
+ delimiter = kwargs.pop('delimiter', ',')
593
+ quotechar = kwargs.pop('quotechar', None)
594
+ unknown_args = list(kwargs.keys())
595
+ if len(unknown_args) > 0:
596
+ raise TypeError(Messages.get_message(MessageCodes.UNKNOWN_ARGUMENT,
597
+ "call_udf", unknown_args[0]))
598
+
599
+ if UtilFuncs._is_lake():
600
+ _Validators._check_auth_token("call_udf")
601
+ env = get_env(UtilFuncs._get_env_name())
602
+ file_list = env.files
603
+ if file_list is None:
604
+ raise TeradataMlException(Messages.get_message(
605
+ MessageCodes.FUNC_EXECUTION_FAILED, "'call_udf'", "No UDF is registered with the name '{}'.".format(udf_name)),
606
+ MessageCodes.FUNC_EXECUTION_FAILED)
607
+ file_column = 'File'
608
+ else:
609
+ file_list = list_files().to_pandas()
610
+ file_column = 'Files'
611
+
612
+ # Get the script name from the environment that starts with tdml_udf_name_<udf_name>_.
613
+ script_file = [file for file in file_list[file_column] if file.startswith('tdml_udf_name_{}_udf_type_'.format(udf_name))]
614
+ if len(script_file) != 1:
615
+ raise TeradataMlException(Messages.get_message(
616
+ MessageCodes.FUNC_EXECUTION_FAILED, "'call_udf'", "Multiple UDFs or no UDF is registered with the name '{}'.".format(udf_name)),
617
+ MessageCodes.FUNC_EXECUTION_FAILED)
618
+
619
+ script_name = script_file[0]
620
+ # Get the return type from the script name.
621
+ x = re.search(r"tdml_udf_name_{}_udf_type_([A-Z_]+)(\d*)_register".format(udf_name), script_name)
622
+ returns = getattr(tdsqlalchemy, x.group(1))
623
+ # If the return type has length, get the length from the script name.
624
+ returns = returns(x.group(2)) if x.group(2) else returns()
625
+
626
+ return _SQLColumnExpression(expression=None, udf_args = func_args, udf_script = script_name, udf_type=returns,\
627
+ delimiter=delimiter, quotechar=quotechar, env_name=env)
628
+
629
+
630
+ def list_udfs(show_files=False):
631
+ """
632
+ DESCRIPTION:
633
+ List all the UDFs registered using 'register()' function.
634
+
635
+ PARAMETERS:
636
+ show_files:
637
+ Optional Argument.
638
+ Specifies whether to show file names or not.
639
+ Default Value: False
640
+ Types: bool
641
+
642
+ RETURNS:
643
+ Pandas DataFrame containing files and it's details or
644
+ None if DataFrame is empty.
645
+
646
+ RAISES:
647
+ TeradataMLException.
648
+
649
+ EXAMPLES:
650
+ # Example 1: Register the user defined function to get the values in lower case,
651
+ then list all the UDFs registered.
652
+ >>> @udf
653
+ ... def to_lower(s):
654
+ ... if s is not None:
655
+ ... return s.lower()
656
+
657
+ # Register the created user defined function.
658
+ >>> register("lower", to_lower)
659
+
660
+ # List all the UDFs registered
661
+ >>> list_udfs(True)
662
+ id name return_type file_name
663
+ 0 lower VARCHAR1024 tdml_udf_name_lower_udf_type_VARCHAR1024_register.py
664
+ 1 upper VARCHAR1024 tdml_udf_name_upper_udf_type_VARCHAR1024_register.py
665
+ 2 add_date DATE tdml_udf_name_add_date_udf_type_DATE_register.py
666
+ 3 sum_cols INTEGER tdml_udf_name_sum_cols_udf_type_INTEGER_register.py
667
+ >>>
668
+ """
669
+
670
+ if UtilFuncs._is_lake():
671
+ _Validators._check_auth_token("list_udfs")
672
+ env_name = UtilFuncs._get_env_name()
673
+ _df = get_env(env_name).files
674
+ if _df is not None:
675
+ # rename the existing DataFrame Column
676
+ _df.rename(columns={'File': 'Files'}, inplace=True)
677
+ _df = _df[_df['Files'].str.startswith('tdml_udf_') & _df['Files'].str.endswith('_register.py')][['Files']]
678
+ if len(_df) == 0:
679
+ print("No files found in remote user environment {}.".format(env_name))
680
+ else:
681
+ return _create_udf_dataframe(_df, show_files)
682
+
683
+ else:
684
+ _df = list_files()
685
+ _df = _df[_df['Files'].startswith('tdml_udf_') & _df['Files'].endswith('_register.py')].to_pandas()
686
+ if len(_df) == 0:
687
+ print("No files found in Vantage")
688
+ else:
689
+ return _create_udf_dataframe(_df, show_files)
690
+
691
+ def _create_udf_dataframe(pandas_df, show_files=False):
692
+ """
693
+ DESCRIPTION:
694
+ Internal function to return pandas DataFrame with
695
+ column names "id", "name", "return_type", "filename".
696
+
697
+ PARAMETERS:
698
+ pandas_df:
699
+ Required Argument.
700
+ Specifies the pandas DataFrame containing one column 'Files'.
701
+ Types: pandas DataFrame
702
+
703
+ show_files:
704
+ Optional Argument.
705
+ Specifies whether to show file names or not.
706
+ Types: bool
707
+
708
+ RETURNS:
709
+ pandas DataFrame.
710
+
711
+ EXAMPLES:
712
+ >>> _create_udf_dataframe(pandas_dataframe)
713
+
714
+ """
715
+ _lists = pandas_df.values.tolist()
716
+ _data = {"id": [], "name": [], "return_type": []}
717
+ if show_files:
718
+ _data.update({"file_name": []})
719
+
720
+ for _counter, _list in enumerate(_lists):
721
+ # Extract udf name and type "tdml_udf_name_fact_udf_type_VARCHAR1024_register.py" -> ['fact', 'VARCHAR1024']
722
+ value = _list[0][14:-12].split('_udf_type_')
723
+ _data["id"].append(_counter)
724
+ _data["name"].append(value[0])
725
+ _data["return_type"].append(value[1])
726
+ if show_files:
727
+ _data["file_name"].append(_list[0])
728
+ return pd.DataFrame(_data)
729
+
730
+
731
+ def deregister(name, returns=None):
732
+ """
733
+ DESCRIPTION:
734
+ Deregisters a user defined function (UDF).
735
+
736
+ PARAMETERS:
737
+ name:
738
+ Required Argument.
739
+ Specifies the name of the user defined function to deregister.
740
+ Types: str
741
+
742
+ returns:
743
+ Optional Argument.
744
+ Specifies the type used to deregister the user defined function.
745
+ Types: teradatasqlalchemy types object
746
+
747
+ RETURNS:
748
+ None
749
+
750
+ RAISES:
751
+ TeradataMLException.
752
+
753
+ EXAMPLES:
754
+ # Example 1: Register the user defined function to get the values in lower case,
755
+ # then deregister it.
756
+ >>> @udf
757
+ ... def to_lower(s):
758
+ ... if s is not None:
759
+ ... return s.lower()
760
+
761
+ # Register the created user defined function.
762
+ >>> register("lower", to_lower)
763
+
764
+ # List all the UDFs registered
765
+ >>> list_udfs(True)
766
+ id name return_type file_name
767
+ 0 lower VARCHAR1024 tdml_udf_name_lower_udf_type_VARCHAR1024_register.py
768
+ 1 upper VARCHAR1024 tdml_udf_name_upper_udf_type_VARCHAR1024_register.py
769
+ 2 add_date DATE tdml_udf_name_add_date_udf_type_DATE_register.py
770
+ 3 sum_cols INTEGER tdml_udf_name_sum_cols_udf_type_INTEGER_register.py
771
+ >>>
772
+
773
+ # Deregister the created user defined function.
774
+ >>> deregister("lower")
775
+
776
+ # List all the UDFs registered
777
+ >>> list_udfs(True)
778
+ id name return_type file_name
779
+ 0 upper VARCHAR1024 tdml_udf_name_upper_udf_type_VARCHAR1024_register.py
780
+ 1 add_date DATE tdml_udf_name_add_date_udf_type_DATE_register.py
781
+ 2 sum_cols INTEGER tdml_udf_name_sum_cols_udf_type_INTEGER_register.py
782
+ >>>
783
+
784
+ # Example 2: Deregister only specified udf function with it return type.
785
+ >>> @udf(returns=FLOAT())
786
+ ... def sum(x, y):
787
+ ... return len(x) + y
788
+
789
+ # Deregister the created user defined function.
790
+ >>> register("sum", sum)
791
+
792
+ # List all the UDFs registered
793
+ >>> list_udfs(True)
794
+ id name return_type file_name
795
+ 0 sum FLOAT tdml_udf_name_sum_udf_type_FLOAT_register.py
796
+ 1 sum INTEGER tdml_udf_name_sum_udf_type_INTEGER_register.py
797
+ >>>
798
+
799
+ # Deregister the created user defined function.
800
+ >>> from teradatasqlalchemy import FLOAT
801
+ >>> deregister("sum", FLOAT())
802
+
803
+ # List all the UDFs registered
804
+ >>> list_udfs(True)
805
+ id name return_type file_name
806
+ 0 sum INTEGER tdml_udf_name_sum_udf_type_INTEGER_register.py
807
+ >>>
808
+ """
809
+ _df = list_udfs(show_files=True)
810
+ # raise Exception list_udfs when DataFrame is empty
811
+ if _df is None:
812
+ raise TeradataMlException(Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
813
+ "'deregister'",
814
+ f"UDF '{name}' does not exist."),
815
+ MessageCodes.FUNC_EXECUTION_FAILED)
816
+
817
+ if returns is None:
818
+ _df = _df[_df['file_name'].str.startswith(f'tdml_udf_name_{name}_udf_type_')]
819
+ else:
820
+ _df = _df[_df['file_name'].str.startswith(f'tdml_udf_name_{name}_udf_type_{_create_return_type(returns)}_register.py')]
821
+
822
+ if len(_df) == 0:
823
+ raise TeradataMlException(Messages.get_message(MessageCodes.FUNC_EXECUTION_FAILED,
824
+ "'deregister'",
825
+ f"UDF '{name}' does not exist."),
826
+ MessageCodes.FUNC_EXECUTION_FAILED)
827
+
828
+ _df = _df.values.tolist()
829
+
830
+ # Remove the file on the lake/enterprise environment.
831
+ if UtilFuncs._is_lake():
832
+ env = get_env(UtilFuncs._get_env_name())
833
+ for file_name in _df:
834
+ env.remove_file(file_name[3], suppress_output=True)
835
+ else:
836
+ for file_name in _df:
837
+ remove_file(file_name[3][:-3], force_remove = True, suppress_output = True)
838
+
839
+
840
+ def _create_return_type(returns):
841
+ """
842
+ DESCRIPTION:
843
+ Internal function to return string representation of
844
+ type "returns" in such a way it is included in file name.
845
+
846
+ PARAMETERS:
847
+ returns:
848
+ Required Argument.
849
+ Specifies the teradatasqlalchemy types object.
850
+ Types: teradatasqlalchemy types object
851
+
852
+ RETURNS:
853
+ string
854
+
855
+ EXAMPLES:
856
+ >>> _create_udf_dataframe(VARCHAR(1024))
857
+ 'VARCHAR1024'
858
+ """
859
+ if isinstance(returns, (VARCHAR, CLOB, CHAR)):
860
+ # If the length is not provided, set it to empty string.
861
+ str_len = str(returns.length) if returns.length else ""
862
+ return_str = str(returns) + str_len
863
+ else:
864
+ return_str = str(returns)
865
+ # Replace the space with underscore in the return type.
866
+ return_str = return_str.replace(" ", "_")
867
+ return return_str