teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +193 -1
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +25 -18
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +20 -2
- teradataml/analytics/utils.py +15 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +341 -112
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +84 -42
- teradataml/automl/data_transformation.py +69 -33
- teradataml/automl/feature_engineering.py +76 -9
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +35 -14
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +122 -63
- teradataml/common/messagecodes.py +14 -3
- teradataml/common/messages.py +8 -4
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +366 -74
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +348 -86
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +45 -29
- teradataml/dataframe/data_transfer.py +72 -46
- teradataml/dataframe/dataframe.py +642 -166
- teradataml/dataframe/dataframe_utils.py +167 -22
- teradataml/dataframe/functions.py +135 -20
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +330 -78
- teradataml/dbutils/dbutils.py +556 -140
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -26
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +307 -40
- teradataml/scriptmgmt/lls_utils.py +428 -145
- teradataml/store/__init__.py +2 -3
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +48 -19
- teradataml/table_operators/Script.py +23 -2
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +49 -1
- teradataml/utils/internal_buffer.py +38 -0
- teradataml/utils/validators.py +377 -62
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
teradataml/LICENSE-3RD-PARTY.pdf
CHANGED
|
Binary file
|
teradataml/README.md
CHANGED
|
@@ -6,7 +6,7 @@ For community support, please visit the [Teradata Community](https://support.ter
|
|
|
6
6
|
|
|
7
7
|
For Teradata customer support, please visit [Teradata Support](https://support.teradata.com/csm).
|
|
8
8
|
|
|
9
|
-
Copyright
|
|
9
|
+
Copyright 2025, Teradata. All Rights Reserved.
|
|
10
10
|
|
|
11
11
|
### Table of Contents
|
|
12
12
|
* [Release Notes](#release-notes)
|
|
@@ -17,6 +17,198 @@ Copyright 2024, Teradata. All Rights Reserved.
|
|
|
17
17
|
|
|
18
18
|
## Release Notes:
|
|
19
19
|
|
|
20
|
+
#### teradataml 20.00.00.05
|
|
21
|
+
* ##### New Features/Functionality
|
|
22
|
+
* ##### teradataml: AutoML
|
|
23
|
+
* New methods added for `AutoML()`, `AutoRegressor()` and `AutoClassifier()`:
|
|
24
|
+
* `get_persisted_tables()` - List the persisted tables created during AutoML execution.
|
|
25
|
+
* `visualize()` - Generates visualizations to analyze and understand the underlying patterns in the data.
|
|
26
|
+
|
|
27
|
+
* ##### AutoDataPrep - Automated Data Preparation
|
|
28
|
+
AutoDataPrep simplifies the data preparation process by automating the different aspects of
|
|
29
|
+
data cleaning and transformation, enabling seamless exploration, transformation, and optimization of datasets.
|
|
30
|
+
* `AutoDataPrep`
|
|
31
|
+
* Methods of AutoDataPrep
|
|
32
|
+
* `__init__()` - Instantiate an object of AutoDataPrep with given parameters.
|
|
33
|
+
* `fit()` - Perform fit on specified data and target column.
|
|
34
|
+
* `get_data()` - Retrieve the data after AutoDataPrep.
|
|
35
|
+
* `load()` - Load the saved datasets from Teradata Vantage.
|
|
36
|
+
* `deploy()` - Persist the datasets generated by AutoDataPrep in Teradata Vantage.
|
|
37
|
+
* `delete_data()` - Deletes the deployed dataset from the Teradata Vantage.
|
|
38
|
+
* `visualize()` - Generates visualizations to analyze and understand the underlying patterns in the data.
|
|
39
|
+
|
|
40
|
+
* ##### teradataml: SQLE Engine Analytic Functions
|
|
41
|
+
* New Analytics Database Analytic Functions:
|
|
42
|
+
* `Apriori()`
|
|
43
|
+
* `NERExtractor()`
|
|
44
|
+
* `TextMorph()`
|
|
45
|
+
|
|
46
|
+
* ##### teradataml: Functions
|
|
47
|
+
* `td_range()` - Creates a DataFrame with a specified range of numbers.
|
|
48
|
+
|
|
49
|
+
* ##### teradataml DataFrameColumn a.k.a. ColumnExpression
|
|
50
|
+
* `DataFrameColumn.to_number()` - Function converts a string-like representation of a number to NUMBER type.
|
|
51
|
+
|
|
52
|
+
* ##### Updates
|
|
53
|
+
* ###### teradataml: DataFrame function
|
|
54
|
+
* `DataFrame.agg()`: User can request for different percentiles while running agg function.
|
|
55
|
+
* New argument `debug` is added to `DataFrame.map_row()`, `DataFrame.map_partition()`, `DataFrame.apply()` and `udf()`. During the execution of these functions, teradataml internally generates scripts, which are garbage collected implicitly. To debug the failures, this argument allows user to control the garbage collection of the script. When set to False (default), script generated is garbage collected, otherwise script is not garbage collected and displays the path to the script, and user is responsible to remove the script if required.
|
|
56
|
+
* `map_row()`, `map_partition()` and `apply()`
|
|
57
|
+
* Raises a TeradataMlException, if the Python interpreter major version is different between the Vantage Python environment and the local user environment.
|
|
58
|
+
* Displays a warning, if `dill` package version is different between the Vantage Python environment and the local user environment.
|
|
59
|
+
* `DataFrame.describe()`: Argument `include` is no longer supported.
|
|
60
|
+
* `assign()` - Optimized SQL query to enhance the performance for consecutive assign calls.
|
|
61
|
+
|
|
62
|
+
* ###### teradataml: Context Creation
|
|
63
|
+
* `create_context()`
|
|
64
|
+
* Enables user to set the authentication token while creating the connection. This authentication token is required to access services running on Teradata Vantage.
|
|
65
|
+
* New argument `sql_timeout` is added to specify timeout for SQL statement execution triggered from the current session.
|
|
66
|
+
|
|
67
|
+
* ###### teradataml: UAF Functions
|
|
68
|
+
* Integer type value is now accepted as a valid value for function arguments accepting float type.
|
|
69
|
+
|
|
70
|
+
* ###### General functions
|
|
71
|
+
* `set_auth_token()`
|
|
72
|
+
* Added argument `kid` to accept the name of the key used while generating `pem_file`.
|
|
73
|
+
* New keyword argument `auth_mech` accepts the authentication mechanism to be used for generating authentication token.
|
|
74
|
+
* Basic authentication is now supported as well. New keyword argument `password` accepts password for database user in such case.
|
|
75
|
+
* `copy_to_sql()` and `read_csv()` support the VECTOR data type.
|
|
76
|
+
|
|
77
|
+
* ###### Open Analytics Framework (OpenAF) APIs:
|
|
78
|
+
* `create_env()`:
|
|
79
|
+
* Supports creation of conda R environment.
|
|
80
|
+
|
|
81
|
+
* ###### teradataml DataFrameColumn a.k.a. ColumnExpression
|
|
82
|
+
* _String Functions_
|
|
83
|
+
* `DataFrameColumn.substr()` - Arguments `start_pos` and `length` now accept DataFrameColumn as input.
|
|
84
|
+
* `DataFrameColumn.to_char()` - Argument `formatter` now accepts DataFrameColumn as input.
|
|
85
|
+
|
|
86
|
+
* ###### teradataml: SQLE Engine Analytic Functions
|
|
87
|
+
* Updated Analytics Database Analytic Functions:
|
|
88
|
+
* `SMOTE()` is now supported on 17.20.00.00 as well.
|
|
89
|
+
* `TextParser()`
|
|
90
|
+
* New arguments added: `enforce_token_limit`, `delimiter_regex`, `doc_id_column`,
|
|
91
|
+
`list_positions`, `token_frequency`, `output_by_word`
|
|
92
|
+
|
|
93
|
+
#### teradataml 20.00.00.04
|
|
94
|
+
* ##### New Features/Functionality
|
|
95
|
+
* ###### teradataml OTF Support:
|
|
96
|
+
* This release has enabled the support for accessing OTF data from teradataml.
|
|
97
|
+
* User can now create a teradataml DataFrame on OTF table, allowing user to use teradataml functions.
|
|
98
|
+
* Example usage below:
|
|
99
|
+
* Creation of view on OTF/datalake table is not supported. Hence, user has to set `configure.temp_object_type` to `VT` using below-mentioned statement.
|
|
100
|
+
```configure.temp_object_type = "VT"```
|
|
101
|
+
* User needs to provide additional information about datalake while creating the DataFrame. There are two approaches to provide datalake information
|
|
102
|
+
* Approach 1: Using `in_schema()`
|
|
103
|
+
```
|
|
104
|
+
>>> from teradataml.dataframe.dataframe import in_schema
|
|
105
|
+
# Create an in_schema object to privide additional information about datalake.
|
|
106
|
+
>>> in_schema_tbl = in_schema(schema_name="datalake_db",
|
|
107
|
+
... table_name="datalake_table_name",
|
|
108
|
+
... datalake_name="datalake")
|
|
109
|
+
>>> otf_df = DataFrame(in_schema_tbl)
|
|
110
|
+
```
|
|
111
|
+
* Approach 2: Using `DataFrame.from_table()`
|
|
112
|
+
```
|
|
113
|
+
>>> otf_df = DataFrame.from_table(table_name = "datalake_table_name",
|
|
114
|
+
... schema_name="datalake_db",
|
|
115
|
+
... datalake_name="datalake")
|
|
116
|
+
```
|
|
117
|
+
* Once this DataFrame is created, users can use any DataFrame method or analytics features/functionality from teradataml with it. Visit Limitations and considerations section in _Teradata Python Package User Guide_ to check the supportability.
|
|
118
|
+
* Note: All further operations create volatile tables in local database.
|
|
119
|
+
```
|
|
120
|
+
>>> new_df = otf_df.assign(new_col=otf_df.existing_col*2)
|
|
121
|
+
```
|
|
122
|
+
* ###### teradataml: DataFrame
|
|
123
|
+
* Introduced a new feature 'Exploratory Data Analysis UI' (EDA-UI), which enhances
|
|
124
|
+
the user experience of teradataml with Jupyter notebook. EDA-UI is displayed by default
|
|
125
|
+
when a teradataml DataFrame is printed in the Jupyter notebook.
|
|
126
|
+
* User can control the EDA-UI using a new configuration option `display.enable_ui`.
|
|
127
|
+
It can be disabled by setting `display.enable_ui` to False.
|
|
128
|
+
* New Function
|
|
129
|
+
* `get_output()` is added to get the result of Analytic function when executed from EDA UI.
|
|
130
|
+
|
|
131
|
+
* ###### OpensourceML
|
|
132
|
+
* `td_lightgbm` - A teradataml OpenSourceML module
|
|
133
|
+
* `deploy()` - User can now deploy the models created by lightgbm `Booster` and `sklearn` modules. Deploying the model stores the model in Vantage for future use with `td_lightgbm`.
|
|
134
|
+
* `td_lightgbm.deploy()` - Deploy the lightgbm `Booster` or any `scikit-learn` model trained outside Vantage.
|
|
135
|
+
* `td_lightgbm.train().deploy()` - Deploys the lightgbm `Booster` object trained within Vantage.
|
|
136
|
+
* `td_lightgbm.<sklearn_class>().deploy()` - Deploys lightgbm's sklearn class object created/trained within Vantage.
|
|
137
|
+
* `load()` - User can load the deployed models back in the current session. This allows user to use the lightgbm functions with the `td_lightgbm` module.
|
|
138
|
+
* `td_lightgbm.load()` - Load the deployed model in the current session.
|
|
139
|
+
|
|
140
|
+
* ###### FeatureStore
|
|
141
|
+
* New function `FeatureStore.delete()` is added to drop the Feature Store and corresponding repo from Vantage.
|
|
142
|
+
|
|
143
|
+
* ###### Database Utility
|
|
144
|
+
* `db_python_version_diff()` - Identifies the Python interpreter major version difference between the interpreter installed on Vantage vs interpreter on the local user environment.
|
|
145
|
+
* `db_python_package_version_diff()` - Identifies the Python package version difference between the packages installed on Vantage vs the local user environment.
|
|
146
|
+
|
|
147
|
+
* ###### BYOM Function
|
|
148
|
+
* `ONNXEmbeddings()` - Calculate embeddings values in Vantage using an embeddings model that has been created outside Vantage and stored in ONNX format.
|
|
149
|
+
|
|
150
|
+
* ###### teradataml Options
|
|
151
|
+
* Configuration Options
|
|
152
|
+
* `configure.temp_object_type` - Allows user to choose between creating volatile tables or views for teradataml internal use. By default, teradataml internally creates the views for some of the operations. Now, with new configuration option, user can opt to create Volatile tables instead of views. This provides greater flexibility for users who lack the necessary permissions to create view or need to create views on tables without WITH GRANT permissions.
|
|
153
|
+
* Display Options
|
|
154
|
+
* `display.enable_ui` - Specifies whether to display exploratory data analysis UI when DataFrame is printed. By default, this option is enabled (True), allowing exploratory data analysis UI to be displayed. When set to False, exploratory data analysis UI is hidden.
|
|
155
|
+
|
|
156
|
+
* ##### Updates
|
|
157
|
+
* ###### teradataml: DataFrame function
|
|
158
|
+
* `describe()`
|
|
159
|
+
* New argument added: `pivot`.
|
|
160
|
+
* When argument `pivot` is set to False, Non-numeric columns are no longer supported for generating statistics.
|
|
161
|
+
Use `CategoricalSummary` and `ColumnSummary`.
|
|
162
|
+
* `fillna()` - Accepts new argument `partition_column` to partition the data and impute null values accordingly.
|
|
163
|
+
* Optimised performance for `DataFrame.plot()`.
|
|
164
|
+
* `DataFrame.plot()` will not regenerate the image when run more than once with same arguments.
|
|
165
|
+
* `DataFrame.from_table()`: New argument `datalake_name` added to accept datalake name while creating DataFrame on datalake table.
|
|
166
|
+
|
|
167
|
+
* ###### teradataml: DataFrame Utilities
|
|
168
|
+
* `in_schema()`: New argument `datalake_name` added to accept datalake name.
|
|
169
|
+
|
|
170
|
+
* ###### Table Operator
|
|
171
|
+
* `Apply()` no longer looks at authentication token by default. Authentication token is now required only if user wants to consume Open Analytics Framework REST APIs.
|
|
172
|
+
|
|
173
|
+
* ###### Hyper Parameter Tuner
|
|
174
|
+
* `GridSearch()` and `RandomSearch()` now displays a message to refer to `get_error_log()` api when model training fails in HPT.
|
|
175
|
+
|
|
176
|
+
* ###### teradataml Options
|
|
177
|
+
* Configuration Options
|
|
178
|
+
* `configure.indb_install_location`
|
|
179
|
+
Determines the installation location of the In-DB Python package based on the installed RPM version.
|
|
180
|
+
|
|
181
|
+
* ###### teradataml Context Creation
|
|
182
|
+
* `create_context()` - Enables user to create connection using either parameters set in environment or config file, in addition to previous method. Newly added options help users to hide the sensitive data from the script.
|
|
183
|
+
|
|
184
|
+
* ###### Open Analytics Framework
|
|
185
|
+
* Enhanced the `create_env()` to display a message when an invalid base_env is passed, informing users that the default base_env is being used.
|
|
186
|
+
|
|
187
|
+
* ###### OpensourceML
|
|
188
|
+
* Raises a TeradataMlException, if the Python interpreter major version is different between the Vantage Python environment and the local user environment.
|
|
189
|
+
* Displays a warning, if specific Python package versions are different between the Vantage Python environment and the local user environment.
|
|
190
|
+
|
|
191
|
+
* ###### Database Utility
|
|
192
|
+
* `db_list_tables()`: New argument `datalake_name` added to accept datalake name to list tables from.
|
|
193
|
+
* `db_drop_table()`:
|
|
194
|
+
* New argument `datalake_name` added to accept datalake name to drop tables from.
|
|
195
|
+
* New argument `purge` added to specify whether to use `PURGE ALL` or `NO PURGE` clause while dropping table.
|
|
196
|
+
|
|
197
|
+
* ##### Bug Fixes
|
|
198
|
+
* `td_lightgbm` OpensourceML module: In multi model case, `td_lightgbm.Dataset().add_features_from()` function should add features of one partition in first Dataset to features of the same partition in second Dataset. This is not the case before and this function fails. Fixed this now.
|
|
199
|
+
* Fixed a minor bug in the `Shap()` and converted argument `training_method` to required argument.
|
|
200
|
+
* Fixed PCA-related warnings in `AutoML`.
|
|
201
|
+
* `AutoML` no longer fails when data with all categorical columns are provided.
|
|
202
|
+
* Fixed `AutoML` issue with upsampling method.
|
|
203
|
+
* Excluded the identifier column from outlier processing in `AutoML`.
|
|
204
|
+
* `DataFrame.set_index()` no longer modifies the original DataFrame's index when argument `append` is used.
|
|
205
|
+
* `concat()` function now supports the DataFrame with column name starts with digit or contains special characters or contains reserved keywords.
|
|
206
|
+
* `create_env()` proceeds to install other files even if current file installation fails.
|
|
207
|
+
* Corrected the error message being raised in `create_env()` when authentication token is not set.
|
|
208
|
+
* Added missing argument `charset` for Vantage Analytic Library functions.
|
|
209
|
+
* New argument `seed` is added to `AutoML`, `AutoRegressor` and `AutoClassifier` to ensure consistency on result.
|
|
210
|
+
* Analytic functions now work even if name of columns for underlying tables has non-ascii characters.
|
|
211
|
+
|
|
20
212
|
#### teradataml 20.00.00.03
|
|
21
213
|
|
|
22
214
|
* teradataml no longer supports setting the `auth_token` using `set_config_params()`. Users should use `set_auth_token()` to set the token.
|
teradataml/__init__.py
CHANGED
|
@@ -63,6 +63,7 @@ from teradataml.opensource import *
|
|
|
63
63
|
|
|
64
64
|
# Import AutoML
|
|
65
65
|
from teradataml.automl import AutoML, AutoRegressor, AutoClassifier
|
|
66
|
+
from teradataml.automl.autodataprep import AutoDataPrep
|
|
66
67
|
|
|
67
68
|
# Import global variable representing session_queryband.
|
|
68
69
|
from teradataml.telemetry_utils.queryband import session_queryband
|
|
@@ -72,6 +73,6 @@ session_queryband.configure_queryband_parameters(app_name="TDML", app_version=__
|
|
|
72
73
|
# Import functions.
|
|
73
74
|
from teradataml.dataframe.functions import *
|
|
74
75
|
|
|
75
|
-
# Import FeatureStore
|
|
76
|
+
# Import FeatureStore.
|
|
76
77
|
from teradataml.store import *
|
|
77
78
|
|
teradataml/_version.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# ##################################################################
|
|
2
2
|
#
|
|
3
|
-
# Copyright
|
|
3
|
+
# Copyright 2025 Teradata. All rights reserved.
|
|
4
4
|
# TERADATA CONFIDENTIAL AND TRADE SECRET
|
|
5
5
|
#
|
|
6
6
|
# Primary Owner: Pankaj Purandare (PankajVinod.Purandare@teradata.com)
|
|
@@ -8,4 +8,4 @@
|
|
|
8
8
|
#
|
|
9
9
|
# ##################################################################
|
|
10
10
|
|
|
11
|
-
version = "20.00.00.
|
|
11
|
+
version = "20.00.00.05"
|
|
@@ -137,6 +137,7 @@ class _AnlyticFunctionExecutor:
|
|
|
137
137
|
else:
|
|
138
138
|
py_types = (py_types, additional_valid_types)
|
|
139
139
|
|
|
140
|
+
|
|
140
141
|
argument_info = [func_arg_name,
|
|
141
142
|
func_arg_value,
|
|
142
143
|
not argument.is_required(),
|
|
@@ -482,17 +483,20 @@ class _AnlyticFunctionExecutor:
|
|
|
482
483
|
|
|
483
484
|
# Validate column is existed or not in the table.
|
|
484
485
|
_Validators._validate_dataframe_has_argument_columns(
|
|
485
|
-
arg_value, arg_name, dataframe, target_table_argument_name)
|
|
486
|
+
arg_value, arg_name, dataframe, target_table_argument_name, case_insensitive=True)
|
|
486
487
|
|
|
487
488
|
# Append square brackets for column range when function
|
|
488
489
|
# does not require special case handler.
|
|
489
490
|
arg_value = self._spl_func_obj._add_square_bracket(arg_value)
|
|
490
491
|
|
|
492
|
+
# Check if there are columns with non-ASCII characters.
|
|
493
|
+
if UtilFuncs._is_ascii(arg_value):
|
|
494
|
+
arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
|
|
491
495
|
# Handling special case for Teradata reserved keywords or column names with spaces.
|
|
492
496
|
# If argument is a string or list of strings, then add quotes to the string.
|
|
493
|
-
|
|
497
|
+
elif arg_name not in ["partition_columns"] and (\
|
|
494
498
|
UtilFuncs._contains_space(arg_value) or list_td_reserved_keywords(arg_value)):
|
|
495
|
-
arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
|
|
499
|
+
arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
|
|
496
500
|
|
|
497
501
|
# SequenceInputBy arguments require special processing.
|
|
498
502
|
if 500 <= argument.get_r_order_number() <= 510:
|
|
@@ -717,10 +721,17 @@ class _AnlyticFunctionExecutor:
|
|
|
717
721
|
kwargs.update(kwargs.pop("generic_arguments", {}))
|
|
718
722
|
|
|
719
723
|
# Add all arguments to dynamic class as data members.
|
|
724
|
+
global_volatile = False
|
|
725
|
+
if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
|
|
726
|
+
global_volatile = True
|
|
720
727
|
|
|
721
728
|
start_time = time.time()
|
|
722
729
|
persist = kwargs.get("persist", False)
|
|
723
|
-
volatile
|
|
730
|
+
# Use global volatile only when persist argument is False. If persist argument
|
|
731
|
+
# is True, then volatile can't be used whether it is global volatile or normal
|
|
732
|
+
# volatile. If it is normal volatile, then it will raise
|
|
733
|
+
# `CANNOT_USE_TOGETHER_WITH` error below.
|
|
734
|
+
volatile = kwargs.get("volatile", global_volatile if not persist else False)
|
|
724
735
|
display_table_name = kwargs.get("display_table_name", True)
|
|
725
736
|
|
|
726
737
|
# Validate local_order_column argument type and values.
|
|
@@ -1039,7 +1050,8 @@ class _SQLEFunctionExecutor(_AnlyticFunctionExecutor):
|
|
|
1039
1050
|
_Validators._validate_dataframe_has_argument_columns(arg_value,
|
|
1040
1051
|
arg,
|
|
1041
1052
|
input_table_arg_value,
|
|
1042
|
-
input_table_arg
|
|
1053
|
+
input_table_arg,
|
|
1054
|
+
case_insensitive=True
|
|
1043
1055
|
)
|
|
1044
1056
|
|
|
1045
1057
|
order_column_arg_value = UtilFuncs._teradata_collapse_arglist(order_column_arg_value, "\"")
|
|
@@ -1491,7 +1503,8 @@ class _TableOperatorExecutor(_SQLEFunctionExecutor):
|
|
|
1491
1503
|
_Validators._validate_dataframe_has_argument_columns(hash_column_value,
|
|
1492
1504
|
hash_column_arg,
|
|
1493
1505
|
input_table_arg_value,
|
|
1494
|
-
input_table_arg
|
|
1506
|
+
input_table_arg,
|
|
1507
|
+
case_insensitive=True
|
|
1495
1508
|
)
|
|
1496
1509
|
|
|
1497
1510
|
# Hash and order by can be used together as long as is_local_order = True.
|
|
@@ -1937,12 +1950,9 @@ class _UAFFunctionExecutor(_SQLEFunctionExecutor):
|
|
|
1937
1950
|
# a list and of the required size.
|
|
1938
1951
|
if argument.get_match_length_of_arguments():
|
|
1939
1952
|
required_length = argument.get_required_length()
|
|
1940
|
-
|
|
1941
|
-
|
|
1942
|
-
|
|
1943
|
-
MessageCodes.INVALID_LIST_LENGTH).format(lang_name,
|
|
1944
|
-
required_length),
|
|
1945
|
-
MessageCodes.INVALID_LIST_LENGTH)
|
|
1953
|
+
|
|
1954
|
+
_Validators._valid_list_length(arg_value=arg_value, arg_name=lang_name,
|
|
1955
|
+
required_length=required_length)
|
|
1946
1956
|
|
|
1947
1957
|
# Perform the checks which are specific to argument(_AnlyFuncArgument) type.
|
|
1948
1958
|
# Check lower bound and upper bound for numeric arguments.
|
|
@@ -2271,12 +2281,9 @@ class _StoredProcedureExecutor(_UAFFunctionExecutor):
|
|
|
2271
2281
|
# a list and of the required size.
|
|
2272
2282
|
if argument.get_match_length_of_arguments():
|
|
2273
2283
|
required_length = argument.get_required_length()
|
|
2274
|
-
|
|
2275
|
-
|
|
2276
|
-
|
|
2277
|
-
MessageCodes.INVALID_LIST_LENGTH).format(lang_name,
|
|
2278
|
-
required_length),
|
|
2279
|
-
MessageCodes.INVALID_LIST_LENGTH)
|
|
2284
|
+
|
|
2285
|
+
_Validators._valid_list_length(arg_value=arg_value, arg_name=lang_name,
|
|
2286
|
+
required_length=required_length)
|
|
2280
2287
|
|
|
2281
2288
|
# Perform the checks which are specific to argument(_AnlyFuncArgument) type.
|
|
2282
2289
|
# Check lower bound and upper bound for numeric arguments.
|
|
@@ -4,7 +4,7 @@ from teradataml.analytics.byom.PMMLPredict import PMMLPredict
|
|
|
4
4
|
from teradataml.analytics.meta_class import _AnalyticFunction
|
|
5
5
|
from teradataml.analytics.meta_class import _common_init, _common_dir
|
|
6
6
|
|
|
7
|
-
_byom_functions = ['H2OPredict', 'PMMLPredict', 'ONNXPredict', 'DataikuPredict', 'DataRobotPredict']
|
|
7
|
+
_byom_functions = ['H2OPredict', 'PMMLPredict', 'ONNXPredict', 'DataikuPredict', 'DataRobotPredict', 'ONNXEmbeddings']
|
|
8
8
|
|
|
9
9
|
for func in _byom_functions:
|
|
10
10
|
globals()[func] = type("{}".format(func), (_AnalyticFunction,),
|
|
@@ -1701,6 +1701,10 @@ class _AnlyFuncArgumentUAF(_AnlyFuncArgumentBaseUAF):
|
|
|
1701
1701
|
py_type = _Dtypes._anly_json_type_to_python_type(self.__data_type)
|
|
1702
1702
|
py_types = py_type if isinstance(py_type, tuple) else (py_type,)
|
|
1703
1703
|
|
|
1704
|
+
# If argument is float and int is not in the list of valid types, then add int to the list.
|
|
1705
|
+
if float in py_types and int not in py_types:
|
|
1706
|
+
py_types = (int,) + py_types
|
|
1707
|
+
|
|
1704
1708
|
# If lists are allowed, add list type also.
|
|
1705
1709
|
if self.__allows_lists and (list not in py_types):
|
|
1706
1710
|
py_types = py_types + (list,)
|
|
@@ -5,7 +5,19 @@ from teradataml.analytics.meta_class import _AnalyticFunction
|
|
|
5
5
|
from teradataml.analytics.meta_class import _common_init, _common_dir
|
|
6
6
|
from teradataml.analytics.json_parser.utils import _get_associated_parent_classes
|
|
7
7
|
|
|
8
|
-
_sqle_functions = [
|
|
8
|
+
_sqle_functions = [
|
|
9
|
+
'AIAnalyzeSentiment',
|
|
10
|
+
'AITextTranslate',
|
|
11
|
+
'AIDetectLanguage',
|
|
12
|
+
'AITextClassifier',
|
|
13
|
+
'AIAskLLM',
|
|
14
|
+
'AITextEmbeddings',
|
|
15
|
+
'AITextSummarize',
|
|
16
|
+
'AIExtractKeyPhrases',
|
|
17
|
+
'AIRecognizePIIEntities',
|
|
18
|
+
'AIMaskPII',
|
|
19
|
+
'AIRecognizeEntities',
|
|
20
|
+
'ANOVA',
|
|
9
21
|
'Antiselect',
|
|
10
22
|
'Attribution',
|
|
11
23
|
'BincodeFit',
|
|
@@ -95,7 +107,13 @@ _sqle_functions = ['ANOVA',
|
|
|
95
107
|
'WordEmbeddings',
|
|
96
108
|
'XGBoost',
|
|
97
109
|
'XGBoostPredict',
|
|
98
|
-
'ZTest'
|
|
110
|
+
'ZTest',
|
|
111
|
+
'HNSW',
|
|
112
|
+
'HNSWPredict',
|
|
113
|
+
'HNSWSummary',
|
|
114
|
+
'TextMorph',
|
|
115
|
+
'NERExtractor',
|
|
116
|
+
'Apriori'
|
|
99
117
|
]
|
|
100
118
|
|
|
101
119
|
for func in _sqle_functions:
|
teradataml/analytics/utils.py
CHANGED
|
@@ -411,6 +411,9 @@ class FuncSpecialCaseHandler():
|
|
|
411
411
|
self._single_quote_arg = lambda arg_value: "'{0}'".format(arg_value)
|
|
412
412
|
# Quote "arg_value" when value is 'NONE'.
|
|
413
413
|
self._single_quote_arg_value_NONE = lambda arg_value: "'{0}'".format(arg_value) if arg_value == 'NONE' else arg_value
|
|
414
|
+
# Remove quotes from "arg_value".
|
|
415
|
+
self._remove_quotes = lambda arg_value, *args, **kwargs: arg_value.replace("'", "") if isinstance(arg_value, str) else arg_value
|
|
416
|
+
|
|
414
417
|
|
|
415
418
|
# Initialize special function handle dictionary.
|
|
416
419
|
self.__handlers = {"Antiselect": {"exclude": self._enclose_square_brackets_add_quote},
|
|
@@ -455,7 +458,18 @@ class FuncSpecialCaseHandler():
|
|
|
455
458
|
"mode": self._single_quote_arg},
|
|
456
459
|
"Matrix2Image": {"type": self._single_quote_arg,
|
|
457
460
|
"colormap": self._single_quote_arg
|
|
458
|
-
}
|
|
461
|
+
},
|
|
462
|
+
"AIAnalyzeSentiment": {"authorization": self._remove_quotes},
|
|
463
|
+
"AITextTranslate": {"authorization": self._remove_quotes},
|
|
464
|
+
"AITextSummarize": {"authorization": self._remove_quotes},
|
|
465
|
+
"AI_TextEmbeddings": {"authorization": self._remove_quotes},
|
|
466
|
+
"AITextClassifier": {"authorization": self._remove_quotes},
|
|
467
|
+
"AIRecognizePIIEntities": {"authorization": self._remove_quotes},
|
|
468
|
+
"AIRecognizeEntities": {"authorization": self._remove_quotes},
|
|
469
|
+
"AIMaskPII:": {"authorization": self._remove_quotes},
|
|
470
|
+
"AIExtractKeyPhrases": {"authorization": self._remove_quotes},
|
|
471
|
+
"AIDetectLanguage": {"authorization": self._remove_quotes},
|
|
472
|
+
"AIAskLLM": {"authorization": self._remove_quotes}
|
|
459
473
|
}
|
|
460
474
|
|
|
461
475
|
# Setter method for argument.
|
teradataml/analytics/valib.py
CHANGED
|
@@ -26,6 +26,8 @@ from teradataml.dataframe.dataframe import DataFrame, in_schema
|
|
|
26
26
|
from teradataml.utils.validators import _Validators
|
|
27
27
|
from teradataml.analytics.Transformations import Binning, Derive, OneHotEncoder, FillNa, \
|
|
28
28
|
LabelEncoder, MinMaxScalar, Retain, Sigmoid, ZScore
|
|
29
|
+
from teradataml.common.constants import TeradataReservedKeywords, TeradataConstants
|
|
30
|
+
|
|
29
31
|
|
|
30
32
|
class _VALIB():
|
|
31
33
|
""" An internal class for executing VALIB analytic functions. """
|
|
@@ -370,9 +372,16 @@ class _VALIB():
|
|
|
370
372
|
self.__get_temp_table_name()
|
|
371
373
|
"""
|
|
372
374
|
prefix = "valib_{}".format(self.__tdml_valib_name.lower())
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
375
|
+
tbl_name = UtilFuncs._generate_temp_table_name(prefix=prefix, use_default_database=True,
|
|
376
|
+
gc_on_quit=True, quote=False,
|
|
377
|
+
table_type=TeradataConstants.TERADATA_TABLE)
|
|
378
|
+
# With VT option, table name is getting generated with 'vt_'.
|
|
379
|
+
# But its not getting created as Volatile table. Hence
|
|
380
|
+
# explicitly garbage collecting.
|
|
381
|
+
if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
|
|
382
|
+
GarbageCollector._add_to_garbagecollector(tbl_name,
|
|
383
|
+
TeradataConstants.TERADATA_TABLE)
|
|
384
|
+
return tbl_name
|
|
376
385
|
|
|
377
386
|
def __process_dyn_cls_output_member(self, arg_name, out_tablename, out_var=None):
|
|
378
387
|
"""
|
|
@@ -447,6 +456,7 @@ class _VALIB():
|
|
|
447
456
|
# Add extension to the table name.
|
|
448
457
|
generated_table_name = "{}{}".format(table_name, extension)
|
|
449
458
|
|
|
459
|
+
|
|
450
460
|
# Register new output table to the GC.
|
|
451
461
|
gc_tabname = "\"{}\".\"{}\"".format(self.__db_name, generated_table_name)
|
|
452
462
|
GarbageCollector._add_to_garbagecollector(gc_tabname, TeradataConstants.TERADATA_TABLE)
|
|
@@ -1463,7 +1473,7 @@ class _VALIB():
|
|
|
1463
1473
|
if gen_sql_only:
|
|
1464
1474
|
valib_inst.__generate_valib_sql_argument_syntax(arg=str(gen_sql_only),
|
|
1465
1475
|
arg_name="gensqlonly")
|
|
1466
|
-
|
|
1476
|
+
charset = kwargs.pop("charset", None)
|
|
1467
1477
|
# Raise error if there are additional arguments.
|
|
1468
1478
|
if len(kwargs) != 0:
|
|
1469
1479
|
err_ = "The keyword arguments for Overlap() should have data1, data2, ..., dataN " \
|
|
@@ -1478,6 +1488,10 @@ class _VALIB():
|
|
|
1478
1488
|
arg_name="tablename")
|
|
1479
1489
|
valib_inst.__generate_valib_sql_argument_syntax(arg=",".join(column_names_df),
|
|
1480
1490
|
arg_name="columns")
|
|
1491
|
+
# Generate clause of charset.
|
|
1492
|
+
if charset:
|
|
1493
|
+
valib_inst.__generate_valib_sql_argument_syntax(arg=charset,
|
|
1494
|
+
arg_name="charset")
|
|
1481
1495
|
|
|
1482
1496
|
return valib_inst._execute_valib_function(skip_data_arg_processing=True,
|
|
1483
1497
|
skip_other_arg_processing=True)
|