teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/README.md +210 -0
- teradataml/__init__.py +1 -1
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +162 -76
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/__init__.py +2 -0
- teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
- teradataml/analytics/json_parser/metadata.py +22 -4
- teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
- teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
- teradataml/analytics/sqle/__init__.py +3 -0
- teradataml/analytics/utils.py +4 -1
- teradataml/automl/__init__.py +2369 -464
- teradataml/automl/autodataprep/__init__.py +15 -0
- teradataml/automl/custom_json_utils.py +184 -112
- teradataml/automl/data_preparation.py +113 -58
- teradataml/automl/data_transformation.py +154 -53
- teradataml/automl/feature_engineering.py +113 -53
- teradataml/automl/feature_exploration.py +548 -25
- teradataml/automl/model_evaluation.py +260 -32
- teradataml/automl/model_training.py +399 -206
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/aed_utils.py +11 -2
- teradataml/common/bulk_exposed_utils.py +4 -2
- teradataml/common/constants.py +62 -2
- teradataml/common/garbagecollector.py +50 -21
- teradataml/common/messagecodes.py +47 -2
- teradataml/common/messages.py +19 -1
- teradataml/common/sqlbundle.py +23 -6
- teradataml/common/utils.py +116 -10
- teradataml/context/aed_context.py +16 -10
- teradataml/data/Employee.csv +5 -0
- teradataml/data/Employee_Address.csv +4 -0
- teradataml/data/Employee_roles.csv +5 -0
- teradataml/data/JulesBelvezeDummyData.csv +100 -0
- teradataml/data/byom_example.json +5 -0
- teradataml/data/creditcard_data.csv +284618 -0
- teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
- teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
- teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
- teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
- teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
- teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
- teradataml/data/load_example_data.py +29 -11
- teradataml/data/payment_fraud_dataset.csv +10001 -0
- teradataml/data/teradataml_example.json +67 -0
- teradataml/dataframe/copy_to.py +714 -54
- teradataml/dataframe/dataframe.py +1153 -33
- teradataml/dataframe/dataframe_utils.py +8 -3
- teradataml/dataframe/functions.py +168 -1
- teradataml/dataframe/setop.py +4 -1
- teradataml/dataframe/sql.py +141 -9
- teradataml/dbutils/dbutils.py +470 -35
- teradataml/dbutils/filemgr.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +456 -142
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/lib/libaed_0_1_aarch64.so +0 -0
- teradataml/scriptmgmt/UserEnv.py +234 -34
- teradataml/scriptmgmt/lls_utils.py +43 -17
- teradataml/sdk/_json_parser.py +1 -1
- teradataml/sdk/api_client.py +9 -6
- teradataml/sdk/modelops/_client.py +3 -0
- teradataml/series/series.py +12 -7
- teradataml/store/feature_store/constants.py +601 -234
- teradataml/store/feature_store/feature_store.py +2886 -616
- teradataml/store/feature_store/mind_map.py +639 -0
- teradataml/store/feature_store/models.py +5831 -214
- teradataml/store/feature_store/utils.py +390 -0
- teradataml/table_operators/table_operator_util.py +1 -1
- teradataml/table_operators/templates/dataframe_register.template +6 -2
- teradataml/table_operators/templates/dataframe_udf.template +6 -2
- teradataml/utils/docstring.py +527 -0
- teradataml/utils/dtypes.py +93 -0
- teradataml/utils/internal_buffer.py +2 -2
- teradataml/utils/utils.py +41 -2
- teradataml/utils/validators.py +694 -17
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
teradataml/README.md
CHANGED
|
@@ -17,6 +17,216 @@ Copyright 2025, Teradata. All Rights Reserved.
|
|
|
17
17
|
|
|
18
18
|
## Release Notes:
|
|
19
19
|
|
|
20
|
+
#### teradataml 20.00.00.07
|
|
21
|
+
* ##### New Features/Functionality
|
|
22
|
+
* ###### teradataml: DataFrame
|
|
23
|
+
* `DataFrame.df_type` - Added new property `df_type` to know the type of the DataFrame.
|
|
24
|
+
* `DataFrame.as_of()` - Added new function which supports temporal time qualifiers on teradataml DataFrame.
|
|
25
|
+
* `DataFrame.closed_rows()` - Added a new function to retrieve closed rows from a DataFrame created on a transaction-time or bi-temporal table/view.
|
|
26
|
+
* `DataFrame.open_rows()` - Added a new function to retrieve open rows from a DataFrame created on a transaction-time or bi-temporal table/view.
|
|
27
|
+
* `DataFrame.historic_rows()` - Added a new function to retrieve historical rows from a DataFrame created on a valid-time or bi-temporal table/view.
|
|
28
|
+
* `DataFrame.future_rows()` - Added a new function to retrieve future rows from a DataFrame created on a valid-time or bi-temporal table/view.
|
|
29
|
+
* `DataFrame.create_view()` - Creates a view from the DataFrame object. This function helps the user to persist the DataFrame as a view, which can be used across sessions.
|
|
30
|
+
* Added argument `persist` to `DataFrame.from_dict()`, `DataFrame.from_pandas()`, and `DataFrame.from_records()` to persist the created DataFrame.
|
|
31
|
+
|
|
32
|
+
* ###### teradataml DataFrameColumn a.k.a. ColumnExpression
|
|
33
|
+
* `DataFrameColumn.begin()` - Function to get beginning date or timestamp from a PERIOD column.
|
|
34
|
+
* `DataFrameColumn.end()` - Function to get ending date or timestamp from a PERIOD column.
|
|
35
|
+
* `DataFrameColumn.between()` - Function to check if the column value is between the lower and upper bounds.
|
|
36
|
+
|
|
37
|
+
* ###### teradataml: Functions
|
|
38
|
+
* `current_date()` - Gets the current date based on the specified time zone.
|
|
39
|
+
* `current_timestamp()` - Gets the current timestamp based on the specified time zone.
|
|
40
|
+
|
|
41
|
+
* ###### teradataml: General Functions
|
|
42
|
+
* Data Transfer Utility
|
|
43
|
+
* `copy_to_sql()`
|
|
44
|
+
* A new argument `partition_by` partitions the index while writing to Teradata Vantage.
|
|
45
|
+
* A new argument `partition_by_case` handles different cases for partitioning the index while writing to Teradata Vantage.
|
|
46
|
+
* A new argument `partition_by_range` partitions the data based on a range while writing to Teradata Vantage.
|
|
47
|
+
* A new argument `sub_partition` subpartitions the main partition according to the provided value.
|
|
48
|
+
* New keyword arguments `valid_time_columns` and `derived_column` helps to copy the data into temporal tables.
|
|
49
|
+
|
|
50
|
+
* ###### Enterprise Feature Store
|
|
51
|
+
* `FeatureStore` - Main class for managing Feature Store operations with comprehensive methods and properties.
|
|
52
|
+
* Methods:
|
|
53
|
+
* `apply()` - Adds Feature, Entity, DataSource, FeatureGroup to FeatureStore.
|
|
54
|
+
* `archive_data_source()` - Archives a specified DataSource.
|
|
55
|
+
* `archive_entity()` - Archives a specified Entity.
|
|
56
|
+
* `archive_feature()` - Archives a specified Feature.
|
|
57
|
+
* `archive_feature_group()` - Archives a specified FeatureGroup.
|
|
58
|
+
* `archive_feature_process()` - Archives a specified FeatureProcess.
|
|
59
|
+
* `delete()` - Deletes the FeatureStore and all its components.
|
|
60
|
+
* `delete_data_source()` - Deletes an archived DataSource.
|
|
61
|
+
* `delete_entity()` - Deletes an archived Entity.
|
|
62
|
+
* `delete_feature()` - Deletes an archived Feature.
|
|
63
|
+
* `delete_feature_group()` - Deletes an archived FeatureGroup.
|
|
64
|
+
* `delete_feature_process()` - Deletes an archived FeatureProcess.
|
|
65
|
+
* `get_data()` - Gets data based on features, entities, and processes.
|
|
66
|
+
* `get_data_domain()` - Retrieves DataDomain object.
|
|
67
|
+
* `get_data_source()` - Gets DataSources associated with FeatureStore.
|
|
68
|
+
* `get_dataset_catalog()` - Retrieves the DatasetCatalog object.
|
|
69
|
+
* `get_entity()` - Gets Entity associated with FeatureStore.
|
|
70
|
+
* `get_feature()` - Gets Feature associated with FeatureStore.
|
|
71
|
+
* `get_feature_group()` - Gets FeatureGroup associated with FeatureStore.
|
|
72
|
+
* `get_feature_process()` - Retrieves FeatureProcess based on arguments.
|
|
73
|
+
* `get_feature_catalog()` - Retrieves FeatureCatalog object.
|
|
74
|
+
* `get_group_features()` - Gets features from a specific feature group.
|
|
75
|
+
* `list_data_sources()` - Lists DataSources in the FeatureStore.
|
|
76
|
+
* `list_entities()` - Lists Entities in the FeatureStore.
|
|
77
|
+
* `list_feature_groups()` - Lists FeatureGroups in the FeatureStore.
|
|
78
|
+
* `list_features()` - Lists Features in the FeatureStore.
|
|
79
|
+
* `list_feature_processes()` - Lists all feature processes in the repo.
|
|
80
|
+
* `list_feature_runs()` - Lists feature process runs and execution status.
|
|
81
|
+
* `list_feature_catalogs()` - Lists all feature catalogs in the repo.
|
|
82
|
+
* `list_data_domains()` - Lists all data domains in the repo.
|
|
83
|
+
* `list_dataset_catalogs()` - Lists all dataset catalogs in the repo.
|
|
84
|
+
* `list_repos()` - Lists available repos configured for FeatureStore.
|
|
85
|
+
* `mind_map()` - Generates a mind map visualization of the feature store structure.
|
|
86
|
+
* `remove_data_domain()` - Removes the data domain from the feature store.
|
|
87
|
+
* `repair()` - Repairs the underlying FeatureStore schema on database.
|
|
88
|
+
* `set_features_active()` - Marks Features as active.
|
|
89
|
+
* `set_features_inactive()` - Marks Features as inactive.
|
|
90
|
+
* `setup()` - Sets up the FeatureStore for a repository.
|
|
91
|
+
* Properties:
|
|
92
|
+
* `data_domain` - Gets or sets the data domain of feature store.
|
|
93
|
+
* `grant` - Grants access to the FeatureStore.
|
|
94
|
+
* `repo` - Gets or sets the repository name.
|
|
95
|
+
* `revoke` - Revokes access from the FeatureStore.
|
|
96
|
+
* `version` - Gets the version of the FeatureStore.
|
|
97
|
+
* `FeatureGroup` - Represents a group of features with methods and properties.
|
|
98
|
+
* Methods:
|
|
99
|
+
* `apply()` - Applies the feature group to objects.
|
|
100
|
+
* `from_DataFrame()` - Creates a FeatureGroup from a DataFrame.
|
|
101
|
+
* `from_query()` - Creates a FeatureGroup from a query.
|
|
102
|
+
* `ingest_features()` - Ingests features from the FeatureGroup into the FeatureStore.
|
|
103
|
+
* `remove_feature()` - Removes a feature from the FeatureGroup.
|
|
104
|
+
* `reset_labels()` - Resets the labels of the FeatureGroup.
|
|
105
|
+
* `set_labels()` - Sets the labels of the FeatureGroup.
|
|
106
|
+
* Properties:
|
|
107
|
+
* `features` - Gets the features in the FeatureGroup.
|
|
108
|
+
* `labels` - Gets or sets the labels of the FeatureGroup.
|
|
109
|
+
* `DataDomain` - Represents a data domain within the FeatureStore with properties.
|
|
110
|
+
* Properties:
|
|
111
|
+
* `entities` - Gets the entities in the data domain.
|
|
112
|
+
* `features` - Gets the features in the data domain.
|
|
113
|
+
* `processes` - Gets the feature processes in the data domain.
|
|
114
|
+
* `datasets` - Gets the datasets in the data domain.
|
|
115
|
+
* `FeatureCatalog` - Manages features within a specific data domain.
|
|
116
|
+
* Methods:
|
|
117
|
+
* `upload_features()` - Uploads features to the catalog.
|
|
118
|
+
* `list_features()` - Lists features in the catalog.
|
|
119
|
+
* `list_feature_versions()` - Lists feature versions in the catalog.
|
|
120
|
+
* `archive_features()` - Archives features in the catalog.
|
|
121
|
+
* `delete_features()` - Deletes features from the catalog.
|
|
122
|
+
* Properties:
|
|
123
|
+
* `data_domain` - Gets the data domain of the catalog.
|
|
124
|
+
* `features` - Gets the features in the catalog.
|
|
125
|
+
* `entities` - Gets the entities in the catalog.
|
|
126
|
+
* `DatasetCatalog` - Manages datasets within a specific data domain.
|
|
127
|
+
* Methods:
|
|
128
|
+
* `build_dataset()` - Builds a dataset from features and entities.
|
|
129
|
+
* `build_time_series()` - Builds a time series dataset.
|
|
130
|
+
* `list_datasets()` - Lists datasets in the catalog.
|
|
131
|
+
* `list_entities()` - Lists entities available for dataset building.
|
|
132
|
+
* `list_features()` - Lists features available for dataset building.
|
|
133
|
+
* `get_dataset()` - Gets a specific dataset by ID.
|
|
134
|
+
* `archive_datasets()` - Archives datasets in the catalog.
|
|
135
|
+
* `delete_datasets()` - Deletes datasets from the catalog.
|
|
136
|
+
* Properties:
|
|
137
|
+
* `data_domain` - Gets the data domain of the catalog.
|
|
138
|
+
* `Dataset` - Represents a specific dataset in the catalog.
|
|
139
|
+
* Properties:
|
|
140
|
+
* `features` - Gets the features in the dataset.
|
|
141
|
+
* `entity` - Gets the entity of the dataset.
|
|
142
|
+
* `view_name` - Gets the view name of the dataset.
|
|
143
|
+
* `id` - Gets the ID of the dataset.
|
|
144
|
+
* `FeatureProcess` - Represents a feature processing workflow.
|
|
145
|
+
* Methods:
|
|
146
|
+
* `run()` - Executes the feature process with optional filters and as_of parameters.
|
|
147
|
+
* Properties:
|
|
148
|
+
* `process_id` - Gets the process ID.
|
|
149
|
+
* `df` - Gets the DataFrame associated with the process.
|
|
150
|
+
* `features` - Gets the features in the process.
|
|
151
|
+
* `entity` - Gets the entity in the process.
|
|
152
|
+
* `data_domain` - Gets the data domain of the process.
|
|
153
|
+
* `filters` - Gets the filters applied to the process.
|
|
154
|
+
* `as_of` - Gets the as_of parameter of the process.
|
|
155
|
+
* `description` - Gets the description of the process.
|
|
156
|
+
* `start_time` - Gets the start time of the process.
|
|
157
|
+
* `end_time` - Gets the end time of the process.
|
|
158
|
+
* `status` - Gets the status of the process.
|
|
159
|
+
|
|
160
|
+
* ###### OpensourceML
|
|
161
|
+
* `td_sklearn` - Now supports input from OTF tables.
|
|
162
|
+
|
|
163
|
+
* ###### BYOM Function
|
|
164
|
+
* `ONNXSeq2Seq()` - Applies sequence-to-sequence model in Vantage that has been created outside Vantage and stored in ONNX format.
|
|
165
|
+
|
|
166
|
+
* ###### teradataml: AutoFraud (Automated Machine Learning - Fraud Detection)
|
|
167
|
+
`AutoFraud` is a special purpose AutoML pipeline designed for fraud detection tasks. It automates the end-to-end process of data preprocessing, feature engineering, model training, evaluation, and deployment to efficiently identify fraudulent activities.
|
|
168
|
+
* Methods:
|
|
169
|
+
* `__init__()` - Instantiates an object of AutoFraud.
|
|
170
|
+
* `fit()` - Performs fit on specified data and target column.
|
|
171
|
+
* `leaderboard()` - Gets the leaderboard for the AutoFraud pipeline, with diverse models, feature selection methods, and performance metrics.
|
|
172
|
+
* `leader()` - Shows best performing model and its details such as feature selection method and performance metrics.
|
|
173
|
+
* `predict()` - Performs prediction on the data using the best model or the model of user's choice from the leaderboard.
|
|
174
|
+
* `evaluate()` - Performs evaluation on the data using the best model or the model of user's choice from the leaderboard.
|
|
175
|
+
* `load()` - Loads the saved model from database.
|
|
176
|
+
* `deploy()` - Saves the trained model inside database.
|
|
177
|
+
* `remove_saved_model()` - Removes the saved model in database.
|
|
178
|
+
* `model_hyperparameters()` - Returns the hyperparameters of fitted or loaded models.
|
|
179
|
+
* `get_persisted_tables()` - Lists the persisted tables created during AutoFraud execution.
|
|
180
|
+
* `visualize()` - Generates visualizations to analyze and understand the underlying patterns in the data.
|
|
181
|
+
* `generate_custom_config()` - Generates custom config JSON file required for customized run of AutoFraud.
|
|
182
|
+
|
|
183
|
+
* ###### teradataml: AutoChurn (Automated Machine Learning - Churn Prediction)
|
|
184
|
+
`AutoChurn` is a special purpose AutoML pipeline for customer churn prediction. It automates the end-to-end process of data preprocessing, feature engineering, model training, evaluation, and deployment to efficiently identify customers likely to churn.
|
|
185
|
+
* Methods:
|
|
186
|
+
* `__init__()` - Instantiates an object of AutoChurn.
|
|
187
|
+
* `fit()` - Performs fit on specified data and target column.
|
|
188
|
+
* `leaderboard()` - Gets the leaderboard for the AutoChurn pipeline, with diverse models, feature selection methods, and performance metrics.
|
|
189
|
+
* `leader()` - Shows best performing model and its details such as feature selection method and performance metrics.
|
|
190
|
+
* `predict()` - Performs prediction on the data using the best model or the model of user's choice from the leaderboard.
|
|
191
|
+
* `evaluate()` - Performs evaluation on the data using the best model or the model of user's choice from the leaderboard.
|
|
192
|
+
* `load()` - Loads the saved model from database.
|
|
193
|
+
* `deploy()` - Saves the trained model inside database.
|
|
194
|
+
* `remove_saved_model()` - Removes the saved model in database.
|
|
195
|
+
* `model_hyperparameters()` - Returns the hyperparameters of fitted or loaded models.
|
|
196
|
+
* `get_persisted_tables()` - Lists the persisted tables created during AutoChurn execution.
|
|
197
|
+
* `visualize()` - Generates visualizations to analyze and understand the underlying patterns in the data.
|
|
198
|
+
* `generate_custom_config()` - Generates custom config JSON file required for customized run of AutoChurn.
|
|
199
|
+
|
|
200
|
+
* ###### teradataml: AutoCluster (Automated Machine Learning - Clustering)
|
|
201
|
+
`AutoCluster` is a special purpose AutoML pipeline for clustering analysis. It automates the end-to-end process of data preprocessing, feature engineering, model training, and prediction to efficiently group data into clusters and extract insights from unlabeled datasets.
|
|
202
|
+
* Methods:
|
|
203
|
+
* `__init__()` - Instantiates an object of AutoCluster.
|
|
204
|
+
* `fit()` - Performs fit on specified data.
|
|
205
|
+
* `leaderboard()` - Gets the leaderboard for the AutoCluster pipeline, with diverse models, feature selection methods, and performance metrics.
|
|
206
|
+
* `leader()` - Shows best performing model and its details such as feature selection method and performance metrics.
|
|
207
|
+
* `predict()` - Performs prediction (cluster assignment) on the data using the best model or the model of user's choice from the leaderboard.
|
|
208
|
+
* `model_hyperparameters()` - Returns the hyperparameters of fitted or loaded models.
|
|
209
|
+
* `get_persisted_tables()` - Lists the persisted tables created during AutoCluster execution.
|
|
210
|
+
* `generate_custom_config()` - Generates custom config JSON file required for customized run of AutoCluster.
|
|
211
|
+
|
|
212
|
+
* ##### Updates
|
|
213
|
+
* ###### teradataml: Functions
|
|
214
|
+
* `udf()` - Added support for `td_buffer` to cache the data in the user defined function.
|
|
215
|
+
|
|
216
|
+
* ###### Open Analytics Framework (OpenAF)
|
|
217
|
+
* UserEnv Class.
|
|
218
|
+
* Properties:
|
|
219
|
+
* `models` - Supports listing of models installed from external model registry like HuggingFace as well.
|
|
220
|
+
* Methods:
|
|
221
|
+
* `install_model()` - Added new arguments `model_name`, `model_type` and `api_key` to support installation of models from external model registry like HuggingFace .
|
|
222
|
+
* `uninstall_model()` - Supports uninstallation of a model from user environment which is installed from external model registry like HuggingFace .
|
|
223
|
+
|
|
224
|
+
* ##### Bug Fixes
|
|
225
|
+
* `set_auth_token()` generates JWT token using default value for iat claim when authentication is being done using PEM file and PAT.
|
|
226
|
+
* `create_env` - When an unavailable R base environment is provided in `create_env()`, requested R user environment is created using latest R base environment version
|
|
227
|
+
out of available base environments. Earlier, `create_env()` would create user environment with latest Python base environment version even though the request is for R user environment.
|
|
228
|
+
* Fixed userWarning in `db_list_tables()`.
|
|
229
|
+
|
|
20
230
|
#### teradataml 20.00.00.06
|
|
21
231
|
* ##### New Features/Functionality
|
|
22
232
|
* ###### teradataml: SDK
|
teradataml/__init__.py
CHANGED
|
@@ -62,7 +62,7 @@ _TDML_DIRECTORY = os.path.dirname(v.__file__)
|
|
|
62
62
|
from teradataml.opensource import *
|
|
63
63
|
|
|
64
64
|
# Import AutoML
|
|
65
|
-
from teradataml.automl import AutoML, AutoRegressor, AutoClassifier
|
|
65
|
+
from teradataml.automl import AutoML, AutoRegressor, AutoClassifier, AutoChurn, AutoFraud, AutoCluster
|
|
66
66
|
from teradataml.automl.autodataprep import AutoDataPrep
|
|
67
67
|
|
|
68
68
|
# Import global variable representing session_queryband.
|
teradataml/_version.py
CHANGED
|
@@ -358,13 +358,7 @@ class _AnlyticFunctionExecutor:
|
|
|
358
358
|
def _process_other_argument(self, **kwargs):
|
|
359
359
|
"""
|
|
360
360
|
DESCRIPTION:
|
|
361
|
-
Function to process other arguments.
|
|
362
|
-
* Checks the required arguments are passed or not.
|
|
363
|
-
* Checks the type of the arguments are expected or not.
|
|
364
|
-
* If argument accepts only specified values, function checks whether
|
|
365
|
-
the value passed is in the specified values or not.
|
|
366
|
-
* If all the checks pass, it then populates the corresponding lists
|
|
367
|
-
with respective values.
|
|
361
|
+
Function to process other arguments.
|
|
368
362
|
|
|
369
363
|
PARAMETERS:
|
|
370
364
|
kwargs:
|
|
@@ -441,68 +435,165 @@ class _AnlyticFunctionExecutor:
|
|
|
441
435
|
|
|
442
436
|
# Let's process all other arguments.
|
|
443
437
|
for argument in self._metadata.arguments:
|
|
444
|
-
|
|
445
|
-
|
|
446
|
-
|
|
447
|
-
|
|
448
|
-
|
|
449
|
-
|
|
450
|
-
|
|
438
|
+
# If 'regexMatch' field is True in the JSON, extract all the
|
|
439
|
+
# arguments which follows the regex pattern specified in 'name'
|
|
440
|
+
# and 'rName' field.
|
|
441
|
+
if argument.regex_match():
|
|
442
|
+
m_name = argument.match_name()
|
|
443
|
+
a_name = argument.get_lang_name()
|
|
444
|
+
|
|
445
|
+
arg_names = argument.get_regex_matched_arguments(a_name,
|
|
446
|
+
**kwargs)
|
|
447
|
+
# If matchName is None, the SQL names remain the same as the
|
|
448
|
+
# Python names. Otherwise, the SQL names are replaced with
|
|
449
|
+
# those whose sql_name starts with the specified matching name.
|
|
450
|
+
if not m_name:
|
|
451
|
+
sql_names = arg_names
|
|
452
|
+
else:
|
|
453
|
+
sql_names = argument.get_regex_sql_name(argument.get_sql_name(),
|
|
454
|
+
m_name,
|
|
455
|
+
arg_names)
|
|
456
|
+
|
|
457
|
+
for a_name, s_name in zip(arg_names, sql_names):
|
|
458
|
+
arg_value = kwargs.get(a_name)
|
|
459
|
+
seq_inp_by = self._process_other_arguments_and_get_sequence_input_by_arg(
|
|
460
|
+
argument, a_name, s_name, arg_value, **kwargs)
|
|
461
|
+
if seq_inp_by:
|
|
462
|
+
sequence_input_by_list.append(seq_inp_by)
|
|
463
|
+
else:
|
|
464
|
+
sql_name = argument.get_sql_name()
|
|
465
|
+
arg_name = argument.get_lang_name()
|
|
466
|
+
arg_value = kwargs.get(arg_name)
|
|
451
467
|
|
|
452
|
-
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
460
|
-
|
|
461
|
-
|
|
462
|
-
|
|
463
|
-
|
|
464
|
-
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
478
|
-
|
|
479
|
-
|
|
480
|
-
|
|
481
|
-
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
485
|
-
|
|
486
|
-
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
491
|
-
|
|
492
|
-
|
|
493
|
-
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
|
|
500
|
-
|
|
501
|
-
|
|
502
|
-
|
|
503
|
-
|
|
504
|
-
|
|
505
|
-
|
|
468
|
+
seq_inp_by = self._process_other_arguments_and_get_sequence_input_by_arg(
|
|
469
|
+
argument, arg_name, sql_name, arg_value, **kwargs)
|
|
470
|
+
|
|
471
|
+
if seq_inp_by:
|
|
472
|
+
sequence_input_by_list.append(seq_inp_by)
|
|
473
|
+
|
|
474
|
+
if sequence_input_by_list:
|
|
475
|
+
self._func_other_arg_sql_names.append("SequenceInputBy")
|
|
476
|
+
sequence_input_by_arg_value = UtilFuncs._teradata_collapse_arglist(sequence_input_by_list, "'")
|
|
477
|
+
self._func_other_args.append(sequence_input_by_arg_value)
|
|
478
|
+
self._func_other_arg_json_datatypes.append("STRING")
|
|
479
|
+
self._sql_specific_attributes["SequenceInputBy"] = sequence_input_by_arg_value
|
|
480
|
+
|
|
481
|
+
def _process_other_arguments_and_get_sequence_input_by_arg(self, argument, arg_name, sql_name, arg_value, **kwargs):
|
|
482
|
+
"""
|
|
483
|
+
DESCRIPTION:
|
|
484
|
+
Function to process the arguments on below checks and get the other arguments.
|
|
485
|
+
This function does the following:
|
|
486
|
+
* Checks the required arguments are passed or not.
|
|
487
|
+
* Checks the type of the arguments are expected or not.
|
|
488
|
+
* If argument accepts only specified values, function checks whether
|
|
489
|
+
the value passed is in the specified values or not.
|
|
490
|
+
* If all the checks pass, it then populates the corresponding lists
|
|
491
|
+
with respective values.
|
|
492
|
+
|
|
493
|
+
PARAMETERS:
|
|
494
|
+
argument:
|
|
495
|
+
Required Argument.
|
|
496
|
+
Specifies information about analytic function argument.
|
|
497
|
+
Types: teradataml.analytics.json_parser.analytic_functions_argument._AnlyFuncArgument
|
|
498
|
+
|
|
499
|
+
arg_name:
|
|
500
|
+
Required Argument.
|
|
501
|
+
Specifies python name of argument.
|
|
502
|
+
Types: str
|
|
503
|
+
|
|
504
|
+
sql_name:
|
|
505
|
+
Required Argument.
|
|
506
|
+
Specifies SQL name of argument.
|
|
507
|
+
Types: str
|
|
508
|
+
|
|
509
|
+
arg_value:
|
|
510
|
+
Required Argument.
|
|
511
|
+
Specifies value of argument.
|
|
512
|
+
Types: datatype provided in the JSON
|
|
513
|
+
|
|
514
|
+
kwargs:
|
|
515
|
+
Specifies the keyword arguments passed to a function.
|
|
516
|
+
|
|
517
|
+
RETURNS:
|
|
518
|
+
str
|
|
519
|
+
|
|
520
|
+
RAISES:
|
|
521
|
+
ValueError OR TypeError OR TeradataMlException.
|
|
522
|
+
|
|
523
|
+
EXAMPLES:
|
|
524
|
+
arg = _AnlyFuncArgument(sql_name="sql_name_param",
|
|
525
|
+
is_required=True,
|
|
526
|
+
sql_description="sql_description_param",
|
|
527
|
+
lang_description="lang_description_param",
|
|
528
|
+
lang_name="lang_name_param",
|
|
529
|
+
use_in_r=False,
|
|
530
|
+
r_order_num=5,
|
|
531
|
+
datatype="int")
|
|
532
|
+
self._process_other_arguments_and_get_sequence_input_by_arg(
|
|
533
|
+
arg, "lang_name_param", "sql_name_param", 2,
|
|
534
|
+
const_num=2, cost_min_len=20)
|
|
535
|
+
"""
|
|
536
|
+
seq_inp_by = None
|
|
537
|
+
|
|
538
|
+
# Set the "argument".
|
|
539
|
+
self._spl_func_obj.set_arg_name(argument)
|
|
540
|
+
# Let's get spl handler if function requires.
|
|
541
|
+
special_case_handler = self._spl_func_obj._get_handle()
|
|
542
|
+
|
|
543
|
+
self._validate_analytic_function_argument(arg_name, arg_value, argument)
|
|
544
|
+
|
|
545
|
+
# Extract column names if it is a Feature.
|
|
546
|
+
arg_value = self._get_column_name_from_feature(arg_value)
|
|
547
|
+
|
|
548
|
+
# Perform the checks which are specific to argument(_AnlyFuncArgument) type.
|
|
549
|
+
# Check lower bound and upper bound for number type of arguments.
|
|
550
|
+
if isinstance(arg_value, (int, float)):
|
|
551
|
+
lower_bound_inclusive = argument.get_lower_bound_type() == "INCLUSIVE"
|
|
552
|
+
upper_bound_inclusive = argument.get_upper_bound_type() == "INCLUSIVE"
|
|
553
|
+
_Validators._validate_argument_range(arg_value,
|
|
554
|
+
arg_name,
|
|
555
|
+
lbound=argument.get_lower_bound(),
|
|
556
|
+
ubound=argument.get_upper_bound(),
|
|
557
|
+
lbound_inclusive=lower_bound_inclusive,
|
|
558
|
+
ubound_inclusive=upper_bound_inclusive)
|
|
559
|
+
|
|
560
|
+
if argument.is_column_argument() and not argument.get_target_table():
|
|
561
|
+
raise TeradataMlException(
|
|
562
|
+
Messages.get_message(MessageCodes.INVALID_JSON, "{}.json".format(self._metadata.sql_function_name),
|
|
563
|
+
"Argument '{}' is specified as column argument but "
|
|
564
|
+
"is Target table is not specified".format(sql_name)), MessageCodes.INVALID_JSON)
|
|
565
|
+
|
|
566
|
+
if argument.is_column_argument() and argument.get_target_table():
|
|
567
|
+
|
|
568
|
+
target_table_argument_name = argument.get_target_table_lang_name()
|
|
569
|
+
dataframe = kwargs.get(target_table_argument_name)
|
|
570
|
+
# Input table can be an object of MLE Functions too.
|
|
571
|
+
if not self._is_argument_dataframe(dataframe) and dataframe is not None:
|
|
572
|
+
dataframe = dataframe._mlresults[0]
|
|
573
|
+
|
|
574
|
+
# Validate column is existed or not in the table.
|
|
575
|
+
_Validators._validate_dataframe_has_argument_columns(
|
|
576
|
+
arg_value, arg_name, dataframe, target_table_argument_name, case_insensitive=True)
|
|
577
|
+
|
|
578
|
+
# Append square brackets for column range when function
|
|
579
|
+
# does not require special case handler.
|
|
580
|
+
arg_value = self._spl_func_obj._add_square_bracket(arg_value)
|
|
581
|
+
|
|
582
|
+
# Check if there are columns with non-ASCII characters.
|
|
583
|
+
if UtilFuncs._is_non_ascii(arg_value):
|
|
584
|
+
arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
|
|
585
|
+
# Handling special case for Teradata reserved keywords or column names with spaces.
|
|
586
|
+
# If argument is a string or list of strings, then add quotes to the string.
|
|
587
|
+
elif arg_name not in ["partition_columns"] and ( \
|
|
588
|
+
UtilFuncs._contains_space(arg_value) or list_td_reserved_keywords(arg_value)):
|
|
589
|
+
arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
|
|
590
|
+
|
|
591
|
+
# SequenceInputBy arguments require special processing.
|
|
592
|
+
if 500 <= argument.get_r_order_number() <= 510:
|
|
593
|
+
|
|
594
|
+
quoted_value = UtilFuncs._teradata_collapse_arglist(arg_value, "")
|
|
595
|
+
seq_inp_by = "{}:{}".format(sql_name, quoted_value)
|
|
596
|
+
else:
|
|
506
597
|
|
|
507
598
|
if arg_value is not None and arg_value != argument.get_default_value():
|
|
508
599
|
|
|
@@ -511,19 +602,14 @@ class _AnlyticFunctionExecutor:
|
|
|
511
602
|
|
|
512
603
|
# Handle special cases for arg_values based on function handler.
|
|
513
604
|
arg_value = special_case_handler(arg_value, self._quote_collapse_other_args) \
|
|
514
|
-
|
|
515
|
-
|
|
605
|
+
if special_case_handler is not None \
|
|
606
|
+
else self._quote_collapse_other_args(argument, arg_value)
|
|
516
607
|
|
|
517
608
|
self._func_other_arg_sql_names.append(sql_name)
|
|
518
609
|
self._func_other_args.append(arg_value)
|
|
519
610
|
self._func_other_arg_json_datatypes.append(argument.get_data_type())
|
|
520
611
|
|
|
521
|
-
|
|
522
|
-
self._func_other_arg_sql_names.append("SequenceInputBy")
|
|
523
|
-
sequence_input_by_arg_value = UtilFuncs._teradata_collapse_arglist(sequence_input_by_list, "'")
|
|
524
|
-
self._func_other_args.append(sequence_input_by_arg_value)
|
|
525
|
-
self._func_other_arg_json_datatypes.append("STRING")
|
|
526
|
-
self._sql_specific_attributes["SequenceInputBy"] = sequence_input_by_arg_value
|
|
612
|
+
return seq_inp_by
|
|
527
613
|
|
|
528
614
|
def _create_dynamic_class(self):
|
|
529
615
|
"""
|
|
@@ -4,7 +4,7 @@ from teradataml.analytics.byom.PMMLPredict import PMMLPredict
|
|
|
4
4
|
from teradataml.analytics.meta_class import _AnalyticFunction
|
|
5
5
|
from teradataml.analytics.meta_class import _common_init, _common_dir
|
|
6
6
|
|
|
7
|
-
_byom_functions = ['H2OPredict', 'PMMLPredict', 'ONNXPredict', 'DataikuPredict', 'DataRobotPredict', 'ONNXEmbeddings']
|
|
7
|
+
_byom_functions = ['H2OPredict', 'PMMLPredict', 'ONNXPredict', 'DataikuPredict', 'DataRobotPredict', 'ONNXEmbeddings', 'ONNXSeq2Seq']
|
|
8
8
|
|
|
9
9
|
for func in _byom_functions:
|
|
10
10
|
globals()[func] = type("{}".format(func), (_AnalyticFunction,),
|