teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +315 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +95 -8
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/metadata.py +12 -3
  8. teradataml/analytics/json_parser/utils.py +7 -2
  9. teradataml/analytics/sqle/__init__.py +5 -1
  10. teradataml/analytics/table_operator/__init__.py +1 -1
  11. teradataml/analytics/uaf/__init__.py +1 -1
  12. teradataml/analytics/utils.py +4 -0
  13. teradataml/analytics/valib.py +18 -4
  14. teradataml/automl/__init__.py +51 -6
  15. teradataml/automl/data_preparation.py +59 -35
  16. teradataml/automl/data_transformation.py +58 -33
  17. teradataml/automl/feature_engineering.py +27 -12
  18. teradataml/automl/model_training.py +73 -46
  19. teradataml/common/constants.py +88 -29
  20. teradataml/common/garbagecollector.py +2 -1
  21. teradataml/common/messagecodes.py +19 -3
  22. teradataml/common/messages.py +6 -1
  23. teradataml/common/sqlbundle.py +64 -12
  24. teradataml/common/utils.py +246 -47
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +161 -27
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/byom_example.json +11 -0
  29. teradataml/data/dataframe_example.json +18 -2
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  37. teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
  38. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  39. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  40. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  41. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  42. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  43. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  44. teradataml/data/hnsw_alter_data.csv +5 -0
  45. teradataml/data/hnsw_data.csv +10 -0
  46. teradataml/data/jsons/byom/h2opredict.json +1 -1
  47. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  48. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  49. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  50. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  51. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  52. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  53. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  54. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  55. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  56. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  57. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  58. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  59. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  60. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  61. teradataml/data/medical_readings.csv +101 -0
  62. teradataml/data/patient_profile.csv +101 -0
  63. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  64. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  65. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  66. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  67. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  68. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  69. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  70. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  71. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  72. teradataml/data/target_udt_data.csv +8 -0
  73. teradataml/data/templates/open_source_ml.json +3 -2
  74. teradataml/data/teradataml_example.json +8 -0
  75. teradataml/data/vectordistance_example.json +4 -0
  76. teradataml/dataframe/copy_to.py +8 -3
  77. teradataml/dataframe/data_transfer.py +11 -1
  78. teradataml/dataframe/dataframe.py +1049 -285
  79. teradataml/dataframe/dataframe_utils.py +152 -20
  80. teradataml/dataframe/functions.py +578 -35
  81. teradataml/dataframe/setop.py +11 -6
  82. teradataml/dataframe/sql.py +185 -16
  83. teradataml/dbutils/dbutils.py +1049 -115
  84. teradataml/dbutils/filemgr.py +48 -1
  85. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  86. teradataml/lib/aed_0_1.dll +0 -0
  87. teradataml/opensource/__init__.py +1 -1
  88. teradataml/opensource/_base.py +1466 -0
  89. teradataml/opensource/_class.py +464 -0
  90. teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
  91. teradataml/opensource/_lightgbm.py +949 -0
  92. teradataml/opensource/_sklearn.py +1008 -0
  93. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
  94. teradataml/options/__init__.py +54 -38
  95. teradataml/options/configure.py +131 -27
  96. teradataml/options/display.py +13 -2
  97. teradataml/plot/axis.py +47 -8
  98. teradataml/plot/figure.py +33 -0
  99. teradataml/plot/plot.py +63 -13
  100. teradataml/scriptmgmt/UserEnv.py +5 -5
  101. teradataml/scriptmgmt/lls_utils.py +130 -40
  102. teradataml/store/__init__.py +12 -0
  103. teradataml/store/feature_store/__init__.py +0 -0
  104. teradataml/store/feature_store/constants.py +291 -0
  105. teradataml/store/feature_store/feature_store.py +2318 -0
  106. teradataml/store/feature_store/models.py +1505 -0
  107. teradataml/table_operators/Apply.py +32 -18
  108. teradataml/table_operators/Script.py +3 -1
  109. teradataml/table_operators/TableOperator.py +3 -1
  110. teradataml/table_operators/query_generator.py +3 -0
  111. teradataml/table_operators/table_operator_query_generator.py +3 -1
  112. teradataml/table_operators/table_operator_util.py +37 -38
  113. teradataml/table_operators/templates/dataframe_register.template +69 -0
  114. teradataml/utils/dtypes.py +51 -2
  115. teradataml/utils/internal_buffer.py +18 -0
  116. teradataml/utils/validators.py +99 -8
  117. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
  118. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
  119. teradataml/libaed_0_1.dylib +0 -0
  120. teradataml/libaed_0_1.so +0 -0
  121. teradataml/opensource/sklearn/__init__.py +0 -1
  122. teradataml/opensource/sklearn/_class.py +0 -255
  123. teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
  124. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  125. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  126. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: teradataml
3
- Version: 20.0.0.2
3
+ Version: 20.0.0.4
4
4
  Summary: Teradata Vantage Python package for Advanced Analytics
5
5
  Home-page: http://www.teradata.com/
6
6
  Author: Teradata Corporation
@@ -17,8 +17,8 @@ Classifier: Topic :: Database :: Front-Ends
17
17
  Classifier: License :: Other/Proprietary License
18
18
  Requires-Python: >=3.8
19
19
  Description-Content-Type: text/markdown
20
- Requires-Dist: teradatasql (>=17.10.0.11)
21
- Requires-Dist: teradatasqlalchemy (>=20.0.0.2)
20
+ Requires-Dist: teradatasql (>=20.0.0.19)
21
+ Requires-Dist: teradatasqlalchemy (>=20.0.0.4)
22
22
  Requires-Dist: pandas (>=0.22)
23
23
  Requires-Dist: psutil
24
24
  Requires-Dist: requests (>=2.25.1)
@@ -28,6 +28,9 @@ Requires-Dist: imbalanced-learn (>=0.8.0)
28
28
  Requires-Dist: pyjwt (>=2.8.0)
29
29
  Requires-Dist: cryptography (>=42.0.5)
30
30
  Requires-Dist: sqlalchemy (>=2.0)
31
+ Requires-Dist: lightgbm (>=3.3.3)
32
+ Requires-Dist: python-dotenv
33
+ Requires-Dist: teradatamlwidgets (>=20.0.0.5)
31
34
 
32
35
  ## Teradata Python package for Advanced Analytics.
33
36
 
@@ -47,6 +50,306 @@ Copyright 2024, Teradata. All Rights Reserved.
47
50
  * [License](#license)
48
51
 
49
52
  ## Release Notes:
53
+
54
+ #### teradataml 20.00.00.04
55
+ * ##### New Features/Functionality
56
+ * ###### teradataml OTF Support:
57
+ * This release has enabled the support for accessing OTF data from teradataml.
58
+ * User can now create a teradataml DataFrame on OTF table, allowing user to use teradataml functions.
59
+ * Example usage below:
60
+ * Creation of view on OTF/datalake table is not supported. Hence, user has to set `configure.temp_object_type` to `VT` using below-mentioned statement.
61
+ ```configure.temp_object_type = "VT"```
62
+ * User needs to provide additional information about datalake while creating the DataFrame. There are two approaches to provide datalake information
63
+ * Approach 1: Using `in_schema()`
64
+ ```
65
+ >>> from teradataml.dataframe.dataframe import in_schema
66
+ # Create an in_schema object to privide additional information about datalake.
67
+ >>> in_schema_tbl = in_schema(schema_name="datalake_db",
68
+ ... table_name="datalake_table_name",
69
+ ... datalake_name="datalake")
70
+ >>> otf_df = DataFrame(in_schema_tbl)
71
+ ```
72
+ * Approach 2: Using `DataFrame.from_table()`
73
+ ```
74
+ >>> otf_df = DataFrame.from_table(table_name = "datalake_table_name",
75
+ ... schema_name="datalake_db",
76
+ ... datalake_name="datalake")
77
+ ```
78
+ * Once this DataFrame is created, users can use any DataFrame method or analytics features/functionality from teradataml with it. Visit Limitations and considerations section in _Teradata Python Package User Guide_ to check the supportability.
79
+ * Note: All further operations create volatile tables in local database.
80
+ ```
81
+ >>> new_df = otf_df.assign(new_col=otf_df.existing_col*2)
82
+ ```
83
+ * ###### teradataml: DataFrame
84
+ * Introduced a new feature 'Exploratory Data Analysis UI' (EDA-UI), which enhances
85
+ the user experience of teradataml with Jupyter notebook. EDA-UI is displayed by default
86
+ when a teradataml DataFrame is printed in the Jupyter notebook.
87
+ * User can control the EDA-UI using a new configuration option `display.enable_ui`.
88
+ It can be disabled by setting `display.enable_ui` to False.
89
+ * New Function
90
+ * `get_output()` is added to get the result of Analytic function when executed from EDA UI.
91
+
92
+ * ###### OpensourceML
93
+ * `td_lightgbm` - A teradataml OpenSourceML module
94
+ * `deploy()` - User can now deploy the models created by lightgbm `Booster` and `sklearn` modules. Deploying the model stores the model in Vantage for future use with `td_lightgbm`.
95
+ * `td_lightgbm.deploy()` - Deploy the lightgbm `Booster` or any `scikit-learn` model trained outside Vantage.
96
+ * `td_lightgbm.train().deploy()` - Deploys the lightgbm `Booster` object trained within Vantage.
97
+ * `td_lightgbm.<sklearn_class>().deploy()` - Deploys lightgbm's sklearn class object created/trained within Vantage.
98
+ * `load()` - User can load the deployed models back in the current session. This allows user to use the lightgbm functions with the `td_lightgbm` module.
99
+ * `td_lightgbm.load()` - Load the deployed model in the current session.
100
+
101
+ * ###### FeatureStore
102
+ * New function `FeatureStore.delete()` is added to drop the Feature Store and corresponding repo from Vantage.
103
+
104
+ * ###### Database Utility
105
+ * `db_python_version_diff()` - Identifies the Python interpreter major version difference between the interpreter installed on Vantage vs interpreter on the local user environment.
106
+ * `db_python_package_version_diff()` - Identifies the Python package version difference between the packages installed on Vantage vs the local user environment.
107
+
108
+ * ###### BYOM Function
109
+ * `ONNXEmbeddings()` - Calculate embeddings values in Vantage using an embeddings model that has been created outside Vantage and stored in ONNX format.
110
+
111
+ * ###### teradataml Options
112
+ * Configuration Options
113
+ * `configure.temp_object_type` - Allows user to choose between creating volatile tables or views for teradataml internal use. By default, teradataml internally creates the views for some of the operations. Now, with new configuration option, user can opt to create Volatile tables instead of views. This provides greater flexibility for users who lack the necessary permissions to create view or need to create views on tables without WITH GRANT permissions.
114
+ * Display Options
115
+ * `display.enable_ui` - Specifies whether to display exploratory data analysis UI when DataFrame is printed. By default, this option is enabled (True), allowing exploratory data analysis UI to be displayed. When set to False, exploratory data analysis UI is hidden.
116
+
117
+ * ##### Updates
118
+ * ###### teradataml: DataFrame function
119
+ * `describe()`
120
+ * New argument added: `pivot`.
121
+ * When argument `pivot` is set to False, Non-numeric columns are no longer supported for generating statistics.
122
+ Use `CategoricalSummary` and `ColumnSummary`.
123
+ * `fillna()` - Accepts new argument `partition_column` to partition the data and impute null values accordingly.
124
+ * Optimised performance for `DataFrame.plot()`.
125
+ * `DataFrame.plot()` will not regenerate the image when run more than once with same arguments.
126
+ * `DataFrame.from_table()`: New argument `datalake_name` added to accept datalake name while creating DataFrame on datalake table.
127
+
128
+ * ###### teradataml: DataFrame Utilities
129
+ * `in_schema()`: New argument `datalake_name` added to accept datalake name.
130
+
131
+ * ###### Table Operator
132
+ * `Apply()` no longer looks at authentication token by default. Authentication token is now required only if user want to update backend Open Analytics Framework service.
133
+
134
+ * ###### Hyper Parameter Tuner
135
+ * `GridSearch()` and `RandomSearch()` now displays a message to refer to `get_error_log()` api when model training fails in HPT.
136
+
137
+ * ###### teradataml Options
138
+ * Configuration Options
139
+ * `configure.indb_install_location`
140
+ Determines the installation location of the In-DB Python package based on the installed RPM version.
141
+
142
+ * ###### teradataml Context Creation
143
+ * `create_context()` - Enables user to create connection using either parameters set in environment or config file, in addition to previous method. Newly added options help users to hide the sensitive data from the script.
144
+
145
+ * ###### Open Analytics Framework
146
+ * Enhanced the `create_env()` to display a message when an invalid base_env is passed, informing users that the default base_env is being used.
147
+
148
+ * ###### OpensourceML
149
+ * Raises a TeradataMlException, if the Python interpreter major version is different between the Vantage Python environment and the local user environment.
150
+ * Displays a warning, if specific Python package versions are different between the Vantage Python environment and the local user environment.
151
+
152
+ * ###### Database Utility
153
+ * `db_list_tables()`: New argument `datalake_name` added to accept datalake name to list tables from.
154
+ * `db_drop_table()`:
155
+ * New argument `datalake_name` added to accept datalake name to drop tables from.
156
+ * New argument `purge` added to specify whether to use `PURGE ALL` or `NO PURGE` clause while dropping table.
157
+
158
+ * ##### Bug Fixes
159
+ * `td_lightgbm` OpensourceML module: In multi model case, `td_lightgbm.Dataset().add_features_from()` function should add features of one partition in first Dataset to features of the same partition in second Dataset. This is not the case before and this function fails. Fixed this now.
160
+ * Fixed a minor bug in the `Shap()` and converted argument `training_method` to required argument.
161
+ * Fixed PCA-related warnings in `AutoML`.
162
+ * `AutoML` no longer fails when data with all categorical columns are provided.
163
+ * Fixed `AutoML` issue with upsampling method.
164
+ * Excluded the identifier column from outlier processing in `AutoML`.
165
+ * `DataFrame.set_index()` no longer modifies the original DataFrame's index when argument `append` is used.
166
+ * `concat()` function now supports the DataFrame with column name starts with digit or contains special characters or contains reserved keywords.
167
+ * `create_env()` proceeds to install other files even if current file installation fails.
168
+ * Corrected the error message being raised in `create_env()` when authentication is not set.
169
+ * Added missing argument `charset` for Vantage Analytic Library functions.
170
+ * New argument `seed` is added to `AutoML`, `AutoRegressor` and `AutoClassifier` to ensure consistency on result.
171
+ * Analytic functions now work even if name of columns for underlying tables is non-ascii characters.
172
+
173
+ #### teradataml 20.00.00.03
174
+
175
+ * teradataml no longer supports setting the `auth_token` using `set_config_params()`. Users should use `set_auth_token()` to set the token.
176
+
177
+ * ##### New Features/Functionality
178
+ * ###### teradataml: DataFrame
179
+ * New Function
180
+ * `alias()` - Creates a DataFrame with alias name.
181
+ * New Properties
182
+ * `db_object_name` - Get the underlying database object name, on which DataFrame is created.
183
+
184
+ * ###### teradataml: GeoDataFrame
185
+ * New Function
186
+ * `alias()` - Creates a GeoDataFrame with alias name.
187
+
188
+ * ###### teradataml: DataFrameColumn a.k.a. ColumnExpression
189
+ * _Arithmetic Functions_
190
+ * `DataFrameColumn.isnan()` - Function evaluates expression to determine if the floating-point
191
+ argument is a NaN (Not-a-Number) value.
192
+ * `DataFrameColumn.isinf()` - Function evaluates expression to determine if the floating-point
193
+ argument is an infinite number.
194
+ * `DataFrameColumn.isfinite()` - Function evaluates expression to determine if it is a finite
195
+ floating value.
196
+
197
+ * ###### FeatureStore - handles feature management within the Vantage environment
198
+ * FeatureStore Components
199
+ * Feature - Represents a feature which is used in ML Modeling.
200
+ * Entity - Represents the columns which serves as uniqueness for the data used in ML Modeling.
201
+ * DataSource - Represents the source of Data.
202
+ * FeatureGroup - Collection of Feature, Entity and DataSource.
203
+ * Methods
204
+ * `apply()` - Adds Feature, Entity, DataSource to a FeatureGroup.
205
+ * `from_DataFrame()` - Creates a FeatureGroup from teradataml DataFrame.
206
+ * `from_query()` - Creates a FeatureGroup using a SQL query.
207
+ * `remove()` - Removes Feature, Entity, or DataSource from a FeatureGroup.
208
+ * `reset_labels()` - Removes the labels assigned to the FeatureGroup, that are set using `set_labels()`.
209
+ * `set_labels()` - Sets the Features as labels for a FeatureGroup.
210
+ * Properties
211
+ * `features` - Get the features of a FeatureGroup.
212
+ * `labels` - Get the labels of FeatureGroup.
213
+ * FeatureStore
214
+ * Methods
215
+ * `apply()` - Adds Feature, Entity, DataSource, FeatureGroup to FeatureStore.
216
+ * `archive_data_source()` - Archives a specified DataSource from a FeatureStore.
217
+ * `archive_entity()` - Archives a specified Entity from a FeatureStore.
218
+ * `archive_feature()` - Archives a specified Feature from a FeatureStore.
219
+ * `archive_feature_group()` - Archives a specified FeatureGroup from a FeatureStore. Method archives underlying Feature, Entity, DataSource also.
220
+ * `delete_data_source()` - Deletes an archived DataSource.
221
+ * `delete_entity()` - Deletes an archived Entity.
222
+ * `delete_feature()` - Deletes an archived Feature.
223
+ * `delete_feature_group()` - Deletes an archived FeatureGroup.
224
+ * `get_data_source()` - Get the DataSources associated with FeatureStore.
225
+ * `get_dataset()` - Get the teradataml DataFrame based on Features, Entities and DataSource from FeatureGroup.
226
+ * `get_entity()` - Get the Entity associated with FeatureStore.
227
+ * `get_feature()` - Get the Feature associated with FeatureStore.
228
+ * `get_feature_group()` - Get the FeatureGroup associated with FeatureStore.
229
+ * `list_data_sources()` - List DataSources.
230
+ * `list_entities()` - List Entities.
231
+ * `list_feature_groups()` - List FeatureGroups.
232
+ * `list_features()` - List Features.
233
+ * `list_repos()` - List available repos which are configured for FeatureStore.
234
+ * `repair()` - Repairs the underlying FeatureStore schema on database.
235
+ * `set_features_active()` - Marks the Features as active.
236
+ * `set_features_inactive()` - Marks the Features as inactive.
237
+ * `setup()` - Setup the FeatureStore for a repo.
238
+ * Property
239
+ * `repo` - Property for FeatureStore repo.
240
+ * `grant` - Property to Grant access on FeatureStore to user.
241
+ * `revoke` - Property to Revoke access on FeatureStore from user.
242
+
243
+ * ###### teradataml: Table Operator Functions
244
+ * `Image2Matrix()` - Converts an image into a matrix.
245
+
246
+ * ###### teradataml: SQLE Engine Analytic Functions
247
+ * New Analytics Database Analytic Functions:
248
+ * `CFilter()`
249
+ * `NaiveBayes()`
250
+ * `TDNaiveBayesPredict()`
251
+ * `Shap()`
252
+ * `SMOTE()`
253
+
254
+ * ###### teradataml: Unbounded Array Framework (UAF) Functions
255
+ * New Unbounded Array Framework(UAF) Functions:
256
+ * `CopyArt()`
257
+
258
+ * ###### General functions
259
+ * Vantage File Management Functions
260
+ * `list_files()` - List the installed files in Database.
261
+
262
+ * ###### OpensourceML: LightGBM
263
+ * teradataml adds support for lightGBM package through `OpensourceML` (`OpenML`) feature.
264
+ The following functionality is added in the current release:
265
+ * `td_lightgbm` - Interface object to run lightgbm functions and classes through Teradata Vantage.
266
+ Example usage below:
267
+ ```
268
+ from teradataml import td_lightgbm, DataFrame
269
+
270
+ df_train = DataFrame("multi_model_classification")
271
+
272
+ feature_columns = ["col1", "col2", "col3", "col4"]
273
+ label_columns = ["label"]
274
+ part_columns = ["partition_column_1", "partition_column_2"]
275
+
276
+ df_x = df_train.select(feature_columns)
277
+ df_y = df_train.select(label_columns)
278
+
279
+ # Dataset creation.
280
+ # Single model case.
281
+ obj_s = td_lightgbm.Dataset(df_x, df_y, silent=True, free_raw_data=False)
282
+
283
+ # Multi model case.
284
+ obj_m = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
285
+ obj_m_v = td_lightgbm.Dataset(df_x, df_y, free_raw_data=False, partition_columns=part_columns)
286
+
287
+ ## Model training.
288
+ # Single model case.
289
+ opt = td_lightgbm.train(params={}, train_set = obj_s, num_boost_round=30)
290
+
291
+ opt.predict(data=df_x, num_iteration=20, pred_contrib=True)
292
+
293
+ # Multi model case.
294
+ opt = td_lightgbm.train(params={}, train_set = obj_m, num_boost_round=30,
295
+ callbacks=[td_lightgbm.record_evaluation(rec)],
296
+ valid_sets=[obj_m_v, obj_m_v])
297
+
298
+ # Passing `label` argument to get it returned in output DataFrame.
299
+ opt.predict(data=df_x, label=df_y, num_iteration=20)
300
+
301
+ ```
302
+ * Added support for accessing scikit-learn APIs using exposed inteface object `td_lightgbm`.
303
+
304
+ Refer Teradata Python Package User Guide for more details of this feature, arguments, usage, examples and supportability in Vantage.
305
+
306
+ * ###### teradataml: Functions
307
+ * `register()` - Registers a user defined function (UDF).
308
+ * `call_udf()` - Calls a registered user defined function (UDF) and returns ColumnExpression.
309
+ * `list_udfs()` - List all the UDFs registered using 'register()' function.
310
+ * `deregister()` - Deregisters a user defined function (UDF).
311
+
312
+ * ###### teradataml: Options
313
+ * Configuration Options
314
+ * `table_operator` - Specifies the name of table operator.
315
+
316
+ * ##### Updates
317
+ * ###### General functions
318
+ * `set_auth_token()` - Added `base_url` parameter which accepts the CCP url.
319
+ 'ues_url' will be deprecated in future and users
320
+ will need to specify 'base_url' instead.
321
+
322
+ * ###### teradataml: DataFrame function
323
+ * `join()`
324
+ * Now supports compound ColumExpression having more than one binary operator in `on` argument.
325
+ * Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
326
+ * self-join now expects aliased DataFrame in `other` argument.
327
+
328
+ * ###### teradataml: GeoDataFrame function
329
+ * `join()`
330
+ * Now supports compound ColumExpression having more than one binary operator in `on` argument.
331
+ * Now supports ColumExpression containing FunctionExpression(s) in `on` argument.
332
+ * self-join now expects aliased DataFrame in `other` argument.
333
+
334
+ * ###### teradataml: Unbounded Array Framework (UAF) Functions
335
+ * `SAX()` - Default value added for `window_size` and `output_frequency`.
336
+ * `DickeyFuller()`
337
+ * Supports TDAnalyticResult as input.
338
+ * Default value added for `max_lags`.
339
+ * Removed parameter `drift_trend_formula`.
340
+ * Updated permitted values for `algorithm`.
341
+
342
+ * ##### teradataml: AutoML
343
+ * `AutoML`, `AutoRegressor` and `AutoClassifier`
344
+ * Now supports DECIMAL datatype as input.
345
+
346
+ * ##### teradataml: SQLE Engine Analytic Functions
347
+ * `TextParser()`
348
+ * Argument name `covert_to_lowercase` changed to `convert_to_lowercase`.
349
+
350
+ * ##### Bug Fixes
351
+ * `db_list_tables()` now returns correct results when '%' is used.
352
+
50
353
  #### teradataml 20.00.00.02
51
354
 
52
355
  * teradataml will no longer be supported with SQLAlchemy < 2.0.
@@ -115,6 +418,10 @@ Copyright 2024, Teradata. All Rights Reserved.
115
418
  * `ues_url`
116
419
  * `auth_token`
117
420
 
421
+ * #### teradata DataFrame
422
+ * `to_pandas()` - Function returns the pandas dataframe with Decimal columns types as float instead of object.
423
+ If user want datatype to be object, set argument `coerce_float` to False.
424
+
118
425
  * ###### Database Utility
119
426
  * `list_td_reserved_keywords()` - Accepts a list of strings as argument.
120
427
 
@@ -133,7 +440,7 @@ Copyright 2024, Teradata. All Rights Reserved.
133
440
  * ##### Bug Fixes
134
441
  * KNN `predict()` function can now predict on test data which does not contain target column.
135
442
  * Metrics functions are supported on the Lake system.
136
- * The following OpensourceML functions from different sklearn modules are fixed.
443
+ * The following OpensourceML functions from different sklearn modules in single model case are fixed.
137
444
  * `sklearn.ensemble`:
138
445
  * ExtraTreesClassifier - `apply()`
139
446
  * ExtraTreesRegressor - `apply()`
@@ -146,12 +453,21 @@ Copyright 2024, Teradata. All Rights Reserved.
146
453
  * Nystroem - `transform()`, `fit_transform()`
147
454
  * PolynomialCountSketch - `transform()`, `fit_transform()`
148
455
  * RBFSampler - `transform()`, `fit_transform()`
149
- * `sklearn.neighbours`:
456
+ * `sklearn.neighbors`:
150
457
  * KNeighborsTransformer - `transform()`, `fit_transform()`
151
458
  * RadiusNeighborsTransformer - `transform()`, `fit_transform()`
152
459
  * `sklearn.preprocessing`:
153
460
  * KernelCenterer - `transform()`
154
461
  * OneHotEncoder - `transform()`, `inverse_transform()`
462
+ * The following OpensourceML functions from different sklearn modules in multi model case are fixed.
463
+ * `sklearn.feature_selection`:
464
+ * SelectFpr - `transform()`, `fit_transform()`, `inverse_transform()`
465
+ * SelectFdr - `transform()`, `fit_transform()`, `inverse_transform()`
466
+ * SelectFromModel - `transform()`, `fit_transform()`, `inverse_transform()`
467
+ * SelectFwe - `transform()`, `fit_transform()`, `inverse_transform()`
468
+ * RFECV - `transform()`, `fit_transform()`, `inverse_transform()`
469
+ * `sklearn.clustering`:
470
+ * Birch - `transform()`, `fit_transform()`
155
471
  * OpensourceML returns teradataml objects for model attributes and functions instead of sklearn
156
472
  objects so that the user can perform further operations like `score()`, `predict()` etc on top
157
473
  of the returned objects.