teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +315 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +95 -8
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/metadata.py +12 -3
  8. teradataml/analytics/json_parser/utils.py +7 -2
  9. teradataml/analytics/sqle/__init__.py +5 -1
  10. teradataml/analytics/table_operator/__init__.py +1 -1
  11. teradataml/analytics/uaf/__init__.py +1 -1
  12. teradataml/analytics/utils.py +4 -0
  13. teradataml/analytics/valib.py +18 -4
  14. teradataml/automl/__init__.py +51 -6
  15. teradataml/automl/data_preparation.py +59 -35
  16. teradataml/automl/data_transformation.py +58 -33
  17. teradataml/automl/feature_engineering.py +27 -12
  18. teradataml/automl/model_training.py +73 -46
  19. teradataml/common/constants.py +88 -29
  20. teradataml/common/garbagecollector.py +2 -1
  21. teradataml/common/messagecodes.py +19 -3
  22. teradataml/common/messages.py +6 -1
  23. teradataml/common/sqlbundle.py +64 -12
  24. teradataml/common/utils.py +246 -47
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +161 -27
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/byom_example.json +11 -0
  29. teradataml/data/dataframe_example.json +18 -2
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  37. teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
  38. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  39. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  40. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  41. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  42. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  43. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  44. teradataml/data/hnsw_alter_data.csv +5 -0
  45. teradataml/data/hnsw_data.csv +10 -0
  46. teradataml/data/jsons/byom/h2opredict.json +1 -1
  47. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  48. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  49. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  50. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  51. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  52. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  53. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  54. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  55. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  56. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  57. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  58. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  59. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  60. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  61. teradataml/data/medical_readings.csv +101 -0
  62. teradataml/data/patient_profile.csv +101 -0
  63. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  64. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  65. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  66. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  67. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  68. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  69. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  70. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  71. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  72. teradataml/data/target_udt_data.csv +8 -0
  73. teradataml/data/templates/open_source_ml.json +3 -2
  74. teradataml/data/teradataml_example.json +8 -0
  75. teradataml/data/vectordistance_example.json +4 -0
  76. teradataml/dataframe/copy_to.py +8 -3
  77. teradataml/dataframe/data_transfer.py +11 -1
  78. teradataml/dataframe/dataframe.py +1049 -285
  79. teradataml/dataframe/dataframe_utils.py +152 -20
  80. teradataml/dataframe/functions.py +578 -35
  81. teradataml/dataframe/setop.py +11 -6
  82. teradataml/dataframe/sql.py +185 -16
  83. teradataml/dbutils/dbutils.py +1049 -115
  84. teradataml/dbutils/filemgr.py +48 -1
  85. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  86. teradataml/lib/aed_0_1.dll +0 -0
  87. teradataml/opensource/__init__.py +1 -1
  88. teradataml/opensource/_base.py +1466 -0
  89. teradataml/opensource/_class.py +464 -0
  90. teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
  91. teradataml/opensource/_lightgbm.py +949 -0
  92. teradataml/opensource/_sklearn.py +1008 -0
  93. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
  94. teradataml/options/__init__.py +54 -38
  95. teradataml/options/configure.py +131 -27
  96. teradataml/options/display.py +13 -2
  97. teradataml/plot/axis.py +47 -8
  98. teradataml/plot/figure.py +33 -0
  99. teradataml/plot/plot.py +63 -13
  100. teradataml/scriptmgmt/UserEnv.py +5 -5
  101. teradataml/scriptmgmt/lls_utils.py +130 -40
  102. teradataml/store/__init__.py +12 -0
  103. teradataml/store/feature_store/__init__.py +0 -0
  104. teradataml/store/feature_store/constants.py +291 -0
  105. teradataml/store/feature_store/feature_store.py +2318 -0
  106. teradataml/store/feature_store/models.py +1505 -0
  107. teradataml/table_operators/Apply.py +32 -18
  108. teradataml/table_operators/Script.py +3 -1
  109. teradataml/table_operators/TableOperator.py +3 -1
  110. teradataml/table_operators/query_generator.py +3 -0
  111. teradataml/table_operators/table_operator_query_generator.py +3 -1
  112. teradataml/table_operators/table_operator_util.py +37 -38
  113. teradataml/table_operators/templates/dataframe_register.template +69 -0
  114. teradataml/utils/dtypes.py +51 -2
  115. teradataml/utils/internal_buffer.py +18 -0
  116. teradataml/utils/validators.py +99 -8
  117. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
  118. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
  119. teradataml/libaed_0_1.dylib +0 -0
  120. teradataml/libaed_0_1.so +0 -0
  121. teradataml/opensource/sklearn/__init__.py +0 -1
  122. teradataml/opensource/sklearn/_class.py +0 -255
  123. teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
  124. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  125. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  126. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
Binary file
teradataml/README.md CHANGED
@@ -16,6 +16,306 @@ Copyright 2024, Teradata. All Rights Reserved.
16
16
  * [License](#license)
17
17
 
18
18
  ## Release Notes:
19
+
20
+ #### teradataml 20.00.00.04
21
+ * ##### New Features/Functionality
22
+ * ###### teradataml OTF Support:
23
+ * This release has enabled the support for accessing OTF data from teradataml.
24
+ * User can now create a teradataml DataFrame on OTF table, allowing user to use teradataml functions.
25
+ * Example usage below:
26
+ * Creation of view on OTF/datalake table is not supported. Hence, user has to set `configure.temp_object_type` to `VT` using below-mentioned statement.
27
+ ```configure.temp_object_type = "VT"```
28
+ * User needs to provide additional information about datalake while creating the DataFrame. There are two approaches to provide datalake information
29
+ * Approach 1: Using `in_schema()`
30
+ ```
31
+ >>> from teradataml.dataframe.dataframe import in_schema
32
+ # Create an in_schema object to privide additional information about datalake.
33
+ >>> in_schema_tbl = in_schema(schema_name="datalake_db",
34
+ ... table_name="datalake_table_name",
35
+ ... datalake_name="datalake")
36
+ >>> otf_df = DataFrame(in_schema_tbl)
37
+ ```
38
+ * Approach 2: Using `DataFrame.from_table()`
39
+ ```
40
+ >>> otf_df = DataFrame.from_table(table_name = "datalake_table_name",
41
+ ... schema_name="datalake_db",
42
+ ... datalake_name="datalake")
43
+ ```
44
+ * Once this DataFrame is created, users can use any DataFrame method or analytics features/functionality from teradataml with it. Visit Limitations and considerations section in _Teradata Python Package User Guide_ to check the supportability.
45
+ * Note: All further operations create volatile tables in local database.
46
+ ```
47
+ >>> new_df = otf_df.assign(new_col=otf_df.existing_col*2)
48
+ ```
49
+ * ###### teradataml: DataFrame
50
+ * Introduced a new feature 'Exploratory Data Analysis UI' (EDA-UI), which enhances
51
+ the user experience of teradataml with Jupyter notebook. EDA-UI is displayed by default
52
+ when a teradataml DataFrame is printed in the Jupyter notebook.
53
+ * User can control the EDA-UI using a new configuration option `display.enable_ui`.
54
+ It can be disabled by setting `display.enable_ui` to False.
55
+ * New Function
56
+ * `get_output()` is added to get the result of Analytic function when executed from EDA UI.
57
+
58
+ * ###### OpensourceML
59
+ * `td_lightgbm` - A teradataml OpenSourceML module
60
+ * `deploy()` - User can now deploy the models created by lightgbm `Booster` and `sklearn` modules. Deploying the model stores the model in Vantage for future use with `td_lightgbm`.
61
+ * `td_lightgbm.deploy()` - Deploy the lightgbm `Booster` or any `scikit-learn` model trained outside Vantage.
62
+ * `td_lightgbm.train().deploy()` - Deploys the lightgbm `Booster` object trained within Vantage.
63
+ * `td_lightgbm.<sklearn_class>().deploy()` - Deploys lightgbm's sklearn class object created/trained within Vantage.
64
+ * `load()` - User can load the deployed models back in the current session. This allows user to use the lightgbm functions with the `td_lightgbm` module.
65
+ * `td_lightgbm.load()` - Load the deployed model in the current session.
66
+
67
+ * ###### FeatureStore
68
+ * New function `FeatureStore.delete()` is added to drop the Feature Store and corresponding repo from Vantage.
69
+
70
+ * ###### Database Utility
71
+ * `db_python_version_diff()` - Identifies the Python interpreter major version difference between the interpreter installed on Vantage vs interpreter on the local user environment.
72
+ * `db_python_package_version_diff()` - Identifies the Python package version difference between the packages installed on Vantage vs the local user environment.
73
+
74
+ * ###### BYOM Function
75
+ * `ONNXEmbeddings()` - Calculate embeddings values in Vantage using an embeddings model that has been created outside Vantage and stored in ONNX format.
76
+
77
+ * ###### teradataml Options
78
+ * Configuration Options
79
+ * `configure.temp_object_type` - Allows user to choose between creating volatile tables or views for teradataml internal use. By default, teradataml internally creates the views for some of the operations. Now, with new configuration option, user can opt to create Volatile tables instead of views. This provides greater flexibility for users who lack the necessary permissions to create view or need to create views on tables without WITH GRANT permissions.
80
+ * Display Options
81
+ * `display.enable_ui` - Specifies whether to display exploratory data analysis UI when DataFrame is printed. By default, this option is enabled (True), allowing exploratory data analysis UI to be displayed. When set to False, exploratory data analysis UI is hidden.
82
+
83
+ * ##### Updates
84
+ * ###### teradataml: DataFrame function
85
+ * `describe()`
86
+ * New argument added: `pivot`.
87
+ * When argument `pivot` is set to False, Non-numeric columns are no longer supported for generating statistics.
88
+ Use `CategoricalSummary` and `ColumnSummary`.
89
+ * `fillna()` - Accepts new argument `partition_column` to partition the data and impute null values accordingly.
90
+ * Optimised performance for `DataFrame.plot()`.
91
+ * `DataFrame.plot()` will not regenerate the image when run more than once with same arguments.
92
+ * `DataFrame.from_table()`: New argument `datalake_name` added to accept datalake name while creating DataFrame on datalake table.
93
+
94
+ * ###### teradataml: DataFrame Utilities
95
+ * `in_schema()`: New argument `datalake_name` added to accept datalake name.
96
+
97
+ * ###### Table Operator
98
+ * `Apply()` no longer looks at authentication token by default. Authentication token is now required only if user want to update backend Open Analytics Framework service.
99
+
100
+ * ###### Hyper Parameter Tuner
101
+ * `GridSearch()` and `RandomSearch()` now displays a message to refer to `get_error_log()` api when model training fails in HPT.
102
+
103
+ * ###### teradataml Options
104
+ * Configuration Options
105
+ * `configure.indb_install_location`
106
+ Determines the installation location of the In-DB Python package based on the installed RPM version.
107
+
108
+ * ###### teradataml Context Creation
109
+ * `create_context()` - Enables user to create connection using either parameters set in environment or config file, in addition to previous method. Newly added options help users to hide the sensitive data from the script.
110
+
111
+ * ###### Open Analytics Framework
112
+ * Enhanced the `create_env()` to display a message when an invalid base_env is passed, informing users that the default base_env is being used.
113
+
114
+ * ###### OpensourceML
115
+ * Raises a TeradataMlException, if the Python interpreter major version is different between the Vantage Python environment and the local user environment.
116
+ * Displays a warning, if specific Python package versions are different between the Vantage Python environment and the local user environment.
117
+
118
+ * ###### Database Utility
119
+ * `db_list_tables()`: New argument `datalake_name` added to accept datalake name to list tables from.
120
+ * `db_drop_table()`:
121
+ * New argument `datalake_name` added to accept datalake name to drop tables from.
122
+ * New argument `purge` added to specify whether to use `PURGE ALL` or `NO PURGE` clause while dropping table.
123
+
124
+ * ##### Bug Fixes
125
+ * `td_lightgbm` OpensourceML module: In multi model case, `td_lightgbm.Dataset().add_features_from()` function should add features of one partition in first Dataset to features of the same partition in second Dataset. This is not the case before and this function fails. Fixed this now.
126
+ * Fixed a minor bug in the `Shap()` and converted argument `training_method` to required argument.
127
+ * Fixed PCA-related warnings in `AutoML`.
128
+ * `AutoML` no longer fails when data with all categorical columns are provided.
129
+ * Fixed `AutoML` issue with upsampling method.
130
+ * Excluded the identifier column from outlier processing in `AutoML`.
131
+ * `DataFrame.set_index()` no longer modifies the original DataFrame's index when argument `append` is used.
132
+ * `concat()` function now supports the DataFrame with column name starts with digit or contains special characters or contains reserved keywords.
133
+ * `create_env()` proceeds to install other files even if current file installation fails.
134
+ * Corrected the error message being raised in `create_env()` when authentication is not set.
135
+ * Added missing argument `charset` for Vantage Analytic Library functions.
136
+ * New argument `seed` is added to `AutoML`, `AutoRegressor` and `AutoClassifier` to ensure consistency on result.
137
+ * Analytic functions now work even if name of columns for underlying tables is non-ascii characters.
138
+
139
+ #### teradataml 20.00.00.03
140
+
141
+ * teradataml no longer supports setting the `auth_token` using `set_config_params()`. Users should use `set_auth_token()` to set the token.
142
+
143
+ * ##### New Features/Functionality
144
+ * ###### teradataml: DataFrame
145
+ * New Function
146
+ * `alias()` - Creates a DataFrame with alias name.
147
+ * New Properties
148
+ * `db_object_name` - Get the underlying database object name, on which DataFrame is created.
149
+
150
+ * ###### teradataml: GeoDataFrame
151
+ * New Function
152
+ * `alias()` - Creates a GeoDataFrame with alias name.
153
+
154
+ * ###### teradataml: DataFrameColumn a.k.a. ColumnExpression
155
+ * _Arithmetic Functions_
156
+ * `DataFrameColumn.isnan()` - Function evaluates expression to determine if the floating-point
157
+ argument is a NaN (Not-a-Number) value.
158
+ * `DataFrameColumn.isinf()` - Function evaluates expression to determine if the floating-point
159
+ argument is an infinite number.
160
+ * `DataFrameColumn.isfinite()` - Function evaluates expression to determine if it is a finite
161
+ floating value.
162
+
163
+ * ###### FeatureStore - handles feature management within the Vantage environment
164
+ * FeatureStore Components
165
+ * Feature - Represents a feature which is used in ML Modeling.
166
+ * Entity - Represents the columns which serves as uniqueness for the data used in ML Modeling.
167
+ * DataSource - Represents the source of Data.
168
+ * FeatureGroup - Collection of Feature, Entity and DataSource.
169
+ * Methods
170
+ * `apply()` - Adds Feature, Entity, DataSource to a FeatureGroup.
171
+ * `from_DataFrame()` - Creates a FeatureGroup from teradataml DataFrame.
172
+ * `from_query()` - Creates a FeatureGroup using a SQL query.
173
+ * `remove()` - Removes Feature, Entity, or DataSource from a FeatureGroup.
174
+ * `reset_labels()` - Removes the labels assigned to the FeatureGroup, that are set using `set_labels()`.
175
+ * `set_labels()` - Sets the Features as labels for a FeatureGroup.
176
+ * Properties
177
+ * `features` - Get the features of a FeatureGroup.
178
+ * `labels` - Get the labels of FeatureGroup.
179
+ * FeatureStore
180
+ * Methods
181
+ * `apply()` - Adds Feature, Entity, DataSource, FeatureGroup to FeatureStore.
182
+ * `archive_data_source()` - Archives a specified DataSource from a FeatureStore.
183
+ * `archive_entity()` - Archives a specified Entity from a FeatureStore.
184
+ * `archive_feature()` - Archives a specified Feature from a FeatureStore.
185
+ * `archive_feature_group()` - Archives a specified FeatureGroup from a FeatureStore. Method archives underlying Feature, Entity, DataSource also.
186
+ * `delete_data_source()` - Deletes an archived DataSource.
187
+ * `delete_entity()` - Deletes an archived Entity.
188
+ * `delete_feature()` - Deletes an archived Feature.
189
+ * `delete_feature_group()` - Deletes an archived FeatureGroup.
190
+ * `get_data_source()` - Get the DataSources associated with FeatureStore.
191
+ * `get_dataset()` - Get the teradataml DataFrame based on Features, Entities and DataSource from FeatureGroup.
192
+ * `get_entity()` - Get the Entity associated with FeatureStore.
193
+ * `get_feature()` - Get the Feature associated with FeatureStore.
194
+ * `get_feature_group()` - Get the FeatureGroup associated with FeatureStore.
195
+ * `list_data_sources()` - List DataSources.
196
+ * `list_entities()` - List Entities.
197
+ * `list_feature_groups()` - List FeatureGroups.
198
+ * `list_features()` - List Features.
199
+ * `list_repos()` - List available repos which are configured for FeatureStore.
200
+ * `repair()` - Repairs the underlying FeatureStore schema on database.
201
+ * `set_features_active()` - Marks the Features as active.
202
+ * `set_features_inactive()` - Marks the Features as inactive.
203
+ * `setup()` - Setup the FeatureStore for a repo.
204
+ * Property
205
+ * `repo` - Property for FeatureStore repo.
206
+ * `grant` - Property to Grant access on FeatureStore to user.
207
+ * `revoke` - Property to Revoke access on FeatureStore from user.
208
+
209
+ * ###### teradataml: Table Operator Functions
210
+ * `Image2Matrix()` - Converts an image into a matrix.
211
+
212
+ * ###### teradataml: SQLE Engine Analytic Functions
213
+ * New Analytics Database Analytic Functions:
214
+ * `CFilter()`
215
+ * `NaiveBayes()`
216
+ * `TDNaiveBayesPredict()`
217
+ * `Shap()`
218
+ * `SMOTE()`
219
+
220
+ * ###### teradataml: Unbounded Array Framework (UAF) Functions
221
+ * New Unbounded Array Framework(UAF) Functions:
222
+ * `CopyArt()`
223
+
224
+ * ###### General functions
225
+ * Vantage File Management Functions
226
+ * `list_files()` - List the installed files in Database.
227
+
228
+ * ###### OpensourceML: LightGBM
229
+ * teradataml adds support for lightGBM package through `OpensourceML` (`OpenML`) feature.
230
+ The following functionality is added in the current release:
231
+ * `td_lightgbm` - Interface object to run lightgbm functions and classes through Teradata Vantage.
232
+ Example usage below:
233
+ ```
234
+ from teradataml import td_lightgbm, DataFrame
235
+
236
+ df_train = DataFrame("multi_model_classification")
237
+
238
+ feature_columns = ["col1", "col2", "col3", "col4"]
239
+ label_columns = ["label"]
240
+ part_columns = ["partition_column_1", "partition_column_2"]
241
+
242
+ df_x = df_train.select(feature_columns)
243
+ df_y = df_train.select(label_columns)
244
+
245
+ # Dataset creation.
246
+ # Single model case.
247
+ obj_s = td_lightgbm.Dataset(df_x, df_y, silent=True, free_raw_data=False)
248
+
249
+ # Multi model case.
250
+ obj_m = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
251
+ obj_m_v = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
252
+
253
+ ## Model training.
254
+ # Single model case.
255
+ opt = td_lightgbm.train(params={}, train_set = obj_s, num_boost_round=30)
256
+
257
+ opt.predict(data=df_x, num_iteration=20, pred_contrib=True)
258
+
259
+ # Multi model case.
260
+ opt = td_lightgbm.train(params={}, train_set = obj_m, num_boost_round=30,
261
+ callbacks=[td_lightgbm.record_evaluation(rec)],
262
+ valid_sets=[obj_m_v, obj_m_v])
263
+
264
+ # Passing `label` argument to get it returned in output DataFrame.
265
+ opt.predict(data=df_x, label=df_y, num_iteration=20)
266
+
267
+ ```
268
+ * Added support for accessing scikit-learn APIs using exposed inteface object `td_lightgbm`.
269
+
270
+ Refer Teradata Python Package User Guide for more details of this feature, arguments, usage, examples and supportability in Vantage.
271
+
272
+ * ###### teradataml: Functions
273
+ * `register()` - Registers a user defined function (UDF).
274
+ * `call_udf()` - Calls a registered user defined function (UDF) and returns ColumnExpression.
275
+ * `list_udfs()` - List all the UDFs registered using 'register()' function.
276
+ * `deregister()` - Deregisters a user defined function (UDF).
277
+
278
+ * ###### teradataml: Options
279
+ * Configuration Options
280
+ * `table_operator` - Specifies the name of table operator.
281
+
282
+ * ##### Updates
283
+ * ###### General functions
284
+ * `set_auth_token()` - Added `base_url` parameter which accepts the CCP url.
285
+ 'ues_url' will be deprecated in future and users
286
+ will need to specify 'base_url' instead.
287
+
288
+ * ###### teradataml: DataFrame function
289
+ * `join()`
290
+ * Now supports compound ColumExpression having more than one binary operator in `on` argument.
291
+ * Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
292
+ * self-join now expects aliased DataFrame in `other` argument.
293
+
294
+ * ###### teradataml: GeoDataFrame function
295
+ * `join()`
296
+ * Now supports compound ColumExpression having more than one binary operator in `on` argument.
297
+ * Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
298
+ * self-join now expects aliased DataFrame in `other` argument.
299
+
300
+ * ###### teradataml: Unbounded Array Framework (UAF) Functions
301
+ * `SAX()` - Default value added for `window_size` and `output_frequency`.
302
+ * `DickeyFuller()`
303
+ * Supports TDAnalyticResult as input.
304
+ * Default value added for `max_lags`.
305
+ * Removed parameter `drift_trend_formula`.
306
+ * Updated permitted values for `algorithm`.
307
+
308
+ * ##### teradataml: AutoML
309
+ * `AutoML`, `AutoRegressor` and `AutoClassifier`
310
+ * Now supports DECIMAL datatype as input.
311
+
312
+ * ##### teradataml: SQLE Engine Analytic Functions
313
+ * `TextParser()`
314
+ * Argument name `covert_to_lowercase` changed to `convert_to_lowercase`.
315
+
316
+ * ##### Bug Fixes
317
+ * `db_list_tables()` now returns correct results when '%' is used.
318
+
19
319
  #### teradataml 20.00.00.02
20
320
 
21
321
  * teradataml will no longer be supported with SQLAlchemy < 2.0.
@@ -83,6 +383,10 @@ Copyright 2024, Teradata. All Rights Reserved.
83
383
  * Following arguments will be deprecated in the future:
84
384
  * `ues_url`
85
385
  * `auth_token`
386
+
387
+ * #### teradata DataFrame
388
+ * `to_pandas()` - Function returns the pandas dataframe with Decimal columns types as float instead of object.
389
+ If user want datatype to be object, set argument `coerce_float` to False.
86
390
 
87
391
  * ###### Database Utility
88
392
  * `list_td_reserved_keywords()` - Accepts a list of strings as argument.
@@ -102,7 +406,7 @@ Copyright 2024, Teradata. All Rights Reserved.
102
406
  * ##### Bug Fixes
103
407
  * KNN `predict()` function can now predict on test data which does not contain target column.
104
408
  * Metrics functions are supported on the Lake system.
105
- * The following OpensourceML functions from different sklearn modules are fixed.
409
+ * The following OpensourceML functions from different sklearn modules in single model case are fixed.
106
410
  * `sklearn.ensemble`:
107
411
  * ExtraTreesClassifier - `apply()`
108
412
  * ExtraTreesRegressor - `apply()`
@@ -115,12 +419,21 @@ Copyright 2024, Teradata. All Rights Reserved.
115
419
  * Nystroem - `transform()`, `fit_transform()`
116
420
  * PolynomialCountSketch - `transform()`, `fit_transform()`
117
421
  * RBFSampler - `transform()`, `fit_transform()`
118
- * `sklearn.neighbours`:
422
+ * `sklearn.neighbors`:
119
423
  * KNeighborsTransformer - `transform()`, `fit_transform()`
120
424
  * RadiusNeighborsTransformer - `transform()`, `fit_transform()`
121
425
  * `sklearn.preprocessing`:
122
426
  * KernelCenterer - `transform()`
123
427
  * OneHotEncoder - `transform()`, `inverse_transform()`
428
+ * The following OpensourceML functions from different sklearn modules in multi model case are fixed.
429
+ * `sklearn.feature_selection`:
430
+ * SelectFpr - `transform()`, `fit_transform()`, `inverse_transform()`
431
+ * SelectFdr - `transform()`, `fit_transform()`, `inverse_transform()`
432
+ * SelectFromModel - `transform()`, `fit_transform()`, `inverse_transform()`
433
+ * SelectFwe - `transform()`, `fit_transform()`, `inverse_transform()`
434
+ * RFECV - `transform()`, `fit_transform()`, `inverse_transform()`
435
+ * `sklearn.clustering`:
436
+ * Birch - `transform()`, `fit_transform()`
124
437
  * OpensourceML returns teradataml objects for model attributes and functions instead of sklearn
125
438
  objects so that the user can perform further operations like `score()`, `predict()` etc on top
126
439
  of the returned objects.
teradataml/__init__.py CHANGED
@@ -71,3 +71,7 @@ session_queryband.configure_queryband_parameters(app_name="TDML", app_version=__
71
71
 
72
72
  # Import functions.
73
73
  from teradataml.dataframe.functions import *
74
+
75
+ # Import FeatureStore and VectorStore
76
+ from teradataml.store import *
77
+
teradataml/_version.py CHANGED
@@ -8,4 +8,4 @@
8
8
  #
9
9
  # ##################################################################
10
10
 
11
- version = "20.00.00.02"
11
+ version = "20.00.00.04"
@@ -28,8 +28,8 @@ from teradataml.common.messages import Messages, MessageCodes
28
28
  from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
29
29
  from teradataml.common.utils import UtilFuncs
30
30
  from teradataml.context.context import _get_context_temp_databasename
31
- from teradataml.dataframe.dataframe import in_schema
32
- from teradataml.dbutils.dbutils import _create_table, db_drop_table
31
+ from teradataml.dataframe.dataframe import in_schema, DataFrame
32
+ from teradataml.dbutils.dbutils import _create_table, db_drop_table, list_td_reserved_keywords
33
33
  from teradatasqlalchemy.types import *
34
34
  from teradataml.table_operators.table_operator_query_generator import TableOperatorQueryGenerator
35
35
  from teradataml.telemetry_utils.queryband import collect_queryband
@@ -343,6 +343,17 @@ class _AnlyticFunctionExecutor:
343
343
  self._func_output_args.append(temp_table_name)
344
344
  self._function_output_table_map[lang_name] = temp_table_name
345
345
 
346
+ def _get_column_name_from_feature(self, obj):
347
+ # Extract the associated column name from Feature.
348
+ from teradataml.store.feature_store.feature_store import Feature
349
+ if isinstance(obj, Feature):
350
+ return obj.column_name
351
+
352
+ if isinstance(obj, list):
353
+ return [self._get_column_name_from_feature(col) for col in obj]
354
+
355
+ return obj
356
+
346
357
  def _process_other_argument(self, **kwargs):
347
358
  """
348
359
  DESCRIPTION:
@@ -439,6 +450,9 @@ class _AnlyticFunctionExecutor:
439
450
 
440
451
  self._validate_analytic_function_argument(arg_name, arg_value, argument)
441
452
 
453
+ # Extract column names if it is a Feature.
454
+ arg_value = self._get_column_name_from_feature(arg_value)
455
+
442
456
  # Perform the checks which are specific to argument(_AnlyFuncArgument) type.
443
457
  # Check lower bound and upper bound for number type of arguments.
444
458
  if isinstance(arg_value, (int, float)):
@@ -468,12 +482,21 @@ class _AnlyticFunctionExecutor:
468
482
 
469
483
  # Validate column is existed or not in the table.
470
484
  _Validators._validate_dataframe_has_argument_columns(
471
- arg_value, arg_name, dataframe, target_table_argument_name)
485
+ arg_value, arg_name, dataframe, target_table_argument_name, case_insensitive=True)
472
486
 
473
487
  # Append square brackets for column range when function
474
488
  # does not require special case handler.
475
489
  arg_value = self._spl_func_obj._add_square_bracket(arg_value)
476
490
 
491
+ # Check if there are columns with non-ASCII characters.
492
+ if UtilFuncs._is_ascii(arg_value):
493
+ arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
494
+ # Handling special case for Teradata reserved keywords or column names with spaces.
495
+ # If argument is a string or list of strings, then add quotes to the string.
496
+ elif arg_name not in ["partition_columns"] and (\
497
+ UtilFuncs._contains_space(arg_value) or list_td_reserved_keywords(arg_value)):
498
+ arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
499
+
477
500
  # SequenceInputBy arguments require special processing.
478
501
  if 500 <= argument.get_r_order_number() <= 510:
479
502
  quoted_value = UtilFuncs._teradata_collapse_arglist(arg_value, "")
@@ -535,6 +558,17 @@ class _AnlyticFunctionExecutor:
535
558
  return repr_string
536
559
  self._dyn_cls_data_members["__repr__"] = print_result
537
560
 
561
+ def copy(self, **args):
562
+ """ Function to copy the ART to another table."""
563
+ from teradataml import CopyArt
564
+ params = {
565
+ "data": self.result,
566
+ "database_name": args.get("database_name", None),
567
+ "table_name": args.get("table_name", None),
568
+ "map_name": args.get("map_name", None),
569
+ "persist": args.get("persist", False)}
570
+ return CopyArt(**params)
571
+
538
572
  query = self.sqlmr_query
539
573
  build_time = None if self.__build_time is None else round(self.__build_time, 2)
540
574
 
@@ -544,6 +578,7 @@ class _AnlyticFunctionExecutor:
544
578
  # To list attributes using dict()
545
579
  self._dyn_cls_data_members["__dict__"] = self._dyn_cls_data_members
546
580
  self._dyn_cls_data_members["_mlresults"] = self._mlresults
581
+ self._dyn_cls_data_members["copy"] = copy
547
582
 
548
583
  # Dynamic class creation with In-DB function name.
549
584
  indb_class = type(self.func_name, (object,), self._dyn_cls_data_members)
@@ -685,10 +720,17 @@ class _AnlyticFunctionExecutor:
685
720
  kwargs.update(kwargs.pop("generic_arguments", {}))
686
721
 
687
722
  # Add all arguments to dynamic class as data members.
723
+ global_volatile = False
724
+ if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
725
+ global_volatile = True
688
726
 
689
727
  start_time = time.time()
690
728
  persist = kwargs.get("persist", False)
691
- volatile = kwargs.get("volatile", False)
729
+ # Use global volatile only when persist argument is False. If persist argument
730
+ # is True, then volatile can't be used whether it is global volatile or normal
731
+ # volatile. If it is normal volatile, then it will raise
732
+ # `CANNOT_USE_TOGETHER_WITH` error below.
733
+ volatile = kwargs.get("volatile", global_volatile if not persist else False)
692
734
  display_table_name = kwargs.get("display_table_name", True)
693
735
 
694
736
  # Validate local_order_column argument type and values.
@@ -700,6 +742,14 @@ class _AnlyticFunctionExecutor:
700
742
  raise TeradataMlException(
701
743
  Messages.get_message(MessageCodes.CANNOT_USE_TOGETHER_WITH, "persist", "volatile"),
702
744
  MessageCodes.CANNOT_USE_TOGETHER_WITH)
745
+
746
+ # If function is VectorDistance and largereference_input is set to True,
747
+ # then set data_partition_column to PartitionKind.DIMENSION and
748
+ # reference_data_partition_column to PartitionKind.ANY .
749
+ if self.func_name == "VectorDistance" and \
750
+ kwargs.get("largereference_input", False):
751
+ kwargs['target_data_partition_column'] = PartitionKind.DIMENSION
752
+ kwargs['reference_data_partition_column'] = PartitionKind.ANY
703
753
 
704
754
  self._dyn_cls_data_members.update(kwargs)
705
755
 
@@ -721,6 +771,11 @@ class _AnlyticFunctionExecutor:
721
771
  if self.func_name in ['GLM', 'TDGLMPredict'] and \
722
772
  any(key in kwargs for key in ['data_partition_column', 'data_hash_column', 'local_order_data']):
723
773
  skip_output_arg_processing = True
774
+ elif self.func_name in ['CopyArt']:
775
+ # CopyArt function take care of persisting the result table internally
776
+ # through 'permanent_table' argument.
777
+ persist = False
778
+ volatile = False
724
779
 
725
780
  if not skip_output_arg_processing:
726
781
  self._process_output_argument(**kwargs)
@@ -994,7 +1049,8 @@ class _SQLEFunctionExecutor(_AnlyticFunctionExecutor):
994
1049
  _Validators._validate_dataframe_has_argument_columns(arg_value,
995
1050
  arg,
996
1051
  input_table_arg_value,
997
- input_table_arg
1052
+ input_table_arg,
1053
+ case_insensitive=True
998
1054
  )
999
1055
 
1000
1056
  order_column_arg_value = UtilFuncs._teradata_collapse_arglist(order_column_arg_value, "\"")
@@ -1446,7 +1502,8 @@ class _TableOperatorExecutor(_SQLEFunctionExecutor):
1446
1502
  _Validators._validate_dataframe_has_argument_columns(hash_column_value,
1447
1503
  hash_column_arg,
1448
1504
  input_table_arg_value,
1449
- input_table_arg
1505
+ input_table_arg,
1506
+ case_insensitive=True
1450
1507
  )
1451
1508
 
1452
1509
  # Hash and order by can be used together as long as is_local_order = True.
@@ -2180,6 +2237,31 @@ class _StoredProcedureExecutor(_UAFFunctionExecutor):
2180
2237
  self._func_other_args['database_name'] = UtilFuncs._teradata_quote_arg(schema_name, "\'", False)
2181
2238
  self._func_other_args['table_name'] = UtilFuncs._teradata_quote_arg(table_name, "\'", False)
2182
2239
 
2240
+ # 'CopyArt' function requires 'SRC_DATABASENMAE' and 'SRC_TABLENAME' as input arguments.
2241
+ # Extract the database and table name from the 'data' argument and add them to the
2242
+ # '_func_other_args' dictionary.
2243
+ if self.func_name == "CopyArt":
2244
+ data = kwargs.get('data', None)
2245
+ argument_info = ["data", data, False, (DataFrame), True]
2246
+ # 'data' is a required argument for 'CopyArt' function to get the source table name and database name.
2247
+ _Validators._validate_missing_required_arguments([argument_info])
2248
+ # 'data' should be a DataFrame.
2249
+ _Validators._validate_function_arguments([argument_info])
2250
+
2251
+ # Add the 'SRC_DATABASENMAE' and 'SRC_TABLENAME' to the '_func_other_args' dictionary.
2252
+ self._func_other_args["SRC_DATABASENMAE"] = "'{0}'".format(UtilFuncs._extract_db_name(data._table_name))
2253
+ self._func_other_args["SRC_TABLENAME"] = "'{0}'".format(UtilFuncs._extract_table_name(data._table_name))
2254
+
2255
+ # Setting permanent_table to True if 'persist' is set to True, else False.
2256
+ kwargs['permanent_table'] = 'True' if kwargs.get('persist', False) else 'False'
2257
+
2258
+ # Setting 'map_name' to empty string if not provided.
2259
+ if kwargs.get('map_name', None) is None:
2260
+ kwargs['map_name'] = ""
2261
+
2262
+ # CopyArt does not take 'data' as input argument.
2263
+ kwargs.pop('data')
2264
+
2183
2265
  for argument in self._metadata.arguments:
2184
2266
  sql_name = argument.get_name()
2185
2267
  lang_name = argument.get_lang_name()
@@ -2236,8 +2318,13 @@ class _StoredProcedureExecutor(_UAFFunctionExecutor):
2236
2318
  Internal function to process the function output.
2237
2319
  """
2238
2320
  for lang_name, table_name in self._function_output_table_map.items():
2239
- out_table_name = UtilFuncs._extract_table_name(table_name)
2240
- out_db_name = UtilFuncs._extract_db_name(table_name)
2321
+ # For 'CopyArt' function, the result should be the destination table name and database name provided as input.
2322
+ if self.func_name == "CopyArt":
2323
+ out_table_name = kwargs.get('table_name')
2324
+ out_db_name = kwargs.get('database_name')
2325
+ else:
2326
+ out_table_name = UtilFuncs._extract_table_name(table_name)
2327
+ out_db_name = UtilFuncs._extract_db_name(table_name)
2241
2328
  df = self._awu._create_data_set_object(
2242
2329
  df_input=out_table_name, database_name=out_db_name, source_type="table")
2243
2330
  self._dyn_cls_data_members[lang_name] = df
@@ -4,7 +4,7 @@ from teradataml.analytics.byom.PMMLPredict import PMMLPredict
4
4
  from teradataml.analytics.meta_class import _AnalyticFunction
5
5
  from teradataml.analytics.meta_class import _common_init, _common_dir
6
6
 
7
- _byom_functions = ['H2OPredict', 'PMMLPredict', 'ONNXPredict', 'DataikuPredict', 'DataRobotPredict']
7
+ _byom_functions = ['H2OPredict', 'PMMLPredict', 'ONNXPredict', 'DataikuPredict', 'DataRobotPredict', 'ONNXEmbeddings']
8
8
 
9
9
  for func in _byom_functions:
10
10
  globals()[func] = type("{}".format(func), (_AnalyticFunction,),
@@ -1179,10 +1179,19 @@ class _AnlyFuncMetadata:
1179
1179
  # from teradataml.data.docs.<function_type>.<doc_dir_with_version_info>.<func_name>
1180
1180
  # import <func_name>
1181
1181
  func_module = __import__(("teradataml.data.docs.{}.{}.{}".
1182
- format(function_type, doc_dir, self.func_name)),
1183
- fromlist=[self.func_name])
1184
- return getattr(func_module, self.func_name).__doc__
1182
+ format(function_type, doc_dir, self.func_name)),
1183
+ fromlist=[self.func_name])
1184
+ return getattr(func_module, self.func_name).__doc__
1185
1185
  except:
1186
+ # For db_version 20.00, if function type is sqle, then check for docs_17_20 directory.
1187
+ if version_dir == '20.00' and function_type == 'sqle':
1188
+ try:
1189
+ func_module = __import__(("teradataml.data.docs.{}.{}.{}".
1190
+ format(function_type, "docs_17_20", self.func_name)),
1191
+ fromlist=[self.func_name])
1192
+ return getattr(func_module, self.func_name).__doc__
1193
+ except:
1194
+ pass
1186
1195
  return ("Refer to Teradata Package for Python Function Reference guide for "
1187
1196
  "Documentation. Reference guide can be found at: https://docs.teradata.com ."
1188
1197
  "Refer to the section with Database version: {}".format(self.__database_version))
@@ -54,7 +54,6 @@ def _get_json_data_from_tdml_repo():
54
54
  # both versions are matched, then the json store has data available so no need
55
55
  # to parse again.
56
56
  if configure.database_version != _JsonStore.version:
57
-
58
57
  # Json store version is different from database version. So, json's should
59
58
  # be parsed again. Before parsing the json, first clean the json store.
60
59
  _JsonStore.clean()
@@ -171,9 +170,15 @@ def __get_json_files_directory():
171
170
  if func_info.value["lowest_version"]:
172
171
  # Check if current function type is allowed on connected Vantage version or not.
173
172
  if func_info.value["func_type"] in func_type_json_version.keys():
173
+ # If function type is SQLE and db_version is 20.00, then add 17.20 JSON directory.
174
+ if func_type_json_version[func_info.value["func_type"]] == '20.00' and \
175
+ func_info.value["func_type"] == 'sqle':
176
+ yield [UtilFuncs._get_data_directory(dir_name="jsons", func_type=func_info,
177
+ version='17.20'),
178
+ func_info.name]
174
179
  yield [UtilFuncs._get_data_directory(dir_name="jsons", func_type=func_info,
175
180
  version=func_type_json_version[func_info.value["func_type"]]),
176
- func_info.name]
181
+ func_info.name]
177
182
  else:
178
183
  yield [UtilFuncs._get_data_directory(dir_name="jsons", func_type=func_info), func_info.name]
179
184
 
@@ -71,6 +71,7 @@ _sqle_functions = ['ANOVA',
71
71
  'Silhouette',
72
72
  'SimpleImputeFit',
73
73
  'SimpleImputeTransform',
74
+ 'SMOTE',
74
75
  'StrApply',
75
76
  'StringSimilarity',
76
77
  'SVM',
@@ -94,7 +95,10 @@ _sqle_functions = ['ANOVA',
94
95
  'WordEmbeddings',
95
96
  'XGBoost',
96
97
  'XGBoostPredict',
97
- 'ZTest'
98
+ 'ZTest',
99
+ 'HNSW',
100
+ 'HNSWPredict',
101
+ 'HNSWSummary',
98
102
  ]
99
103
 
100
104
  for func in _sqle_functions:
@@ -1,7 +1,7 @@
1
1
  from teradataml.analytics.meta_class import _AnalyticFunction
2
2
  from teradataml.analytics.meta_class import _common_init, _common_dir
3
3
 
4
- _nos_functions = ['ReadNOS', 'WriteNOS']
4
+ _nos_functions = ['ReadNOS', 'WriteNOS', 'Image2Matrix']
5
5
 
6
6
  for func in _nos_functions:
7
7
  globals()[func] = type("{}".format(func), (_AnalyticFunction,),