PyPI - teradataml - Versions diffs - 20.0.0.5__py3-none-any.whl → 20.0.0.7__py3-none-any.whl - Mend

teradataml 20.0.0.5py3-none-any.whl → 20.0.0.7py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of teradataml might be problematic. Click here for more details.

Files changed (119) hide show

teradataml/LICENSE-3RD-PARTY.pdf +0 -0
teradataml/README.md +306 -0
teradataml/__init__.py +1 -1
teradataml/_version.py +1 -1
teradataml/analytics/analytic_function_executor.py +162 -76
teradataml/analytics/byom/__init__.py +1 -1
teradataml/analytics/json_parser/__init__.py +2 -0
teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
teradataml/analytics/json_parser/metadata.py +22 -4
teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
teradataml/analytics/sqle/__init__.py +3 -0
teradataml/analytics/utils.py +59 -11
teradataml/automl/__init__.py +2369 -464
teradataml/automl/autodataprep/__init__.py +15 -0
teradataml/automl/custom_json_utils.py +184 -112
teradataml/automl/data_preparation.py +113 -58
teradataml/automl/data_transformation.py +154 -53
teradataml/automl/feature_engineering.py +113 -53
teradataml/automl/feature_exploration.py +548 -25
teradataml/automl/model_evaluation.py +260 -32
teradataml/automl/model_training.py +399 -206
teradataml/clients/auth_client.py +10 -6
teradataml/clients/keycloak_client.py +165 -0
teradataml/common/aed_utils.py +11 -2
teradataml/common/bulk_exposed_utils.py +4 -2
teradataml/common/constants.py +72 -2
teradataml/common/exceptions.py +32 -0
teradataml/common/garbagecollector.py +50 -21
teradataml/common/messagecodes.py +73 -1
teradataml/common/messages.py +27 -1
teradataml/common/sqlbundle.py +25 -7
teradataml/common/utils.py +210 -22
teradataml/context/aed_context.py +16 -10
teradataml/context/context.py +37 -9
teradataml/data/Employee.csv +5 -0
teradataml/data/Employee_Address.csv +4 -0
teradataml/data/Employee_roles.csv +5 -0
teradataml/data/JulesBelvezeDummyData.csv +100 -0
teradataml/data/byom_example.json +5 -0
teradataml/data/creditcard_data.csv +284618 -0
teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
teradataml/data/jsons/byom/onnxembeddings.json +1 -0
teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
teradataml/data/load_example_data.py +29 -11
teradataml/data/pattern_matching_data.csv +11 -0
teradataml/data/payment_fraud_dataset.csv +10001 -0
teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
teradataml/data/teradataml_example.json +75 -1
teradataml/data/url_data.csv +10 -9
teradataml/dataframe/copy_to.py +715 -55
teradataml/dataframe/dataframe.py +2115 -97
teradataml/dataframe/dataframe_utils.py +66 -28
teradataml/dataframe/functions.py +1130 -2
teradataml/dataframe/setop.py +4 -1
teradataml/dataframe/sql.py +710 -1039
teradataml/dbutils/dbutils.py +470 -35
teradataml/dbutils/filemgr.py +1 -1
teradataml/hyperparameter_tuner/optimizer.py +456 -142
teradataml/hyperparameter_tuner/utils.py +4 -2
teradataml/lib/aed_0_1.dll +0 -0
teradataml/lib/libaed_0_1.dylib +0 -0
teradataml/lib/libaed_0_1.so +0 -0
teradataml/lib/libaed_0_1_aarch64.so +0 -0
teradataml/opensource/_base.py +7 -1
teradataml/options/configure.py +20 -4
teradataml/scriptmgmt/UserEnv.py +247 -36
teradataml/scriptmgmt/lls_utils.py +140 -39
teradataml/sdk/README.md +79 -0
teradataml/sdk/__init__.py +4 -0
teradataml/sdk/_auth_modes.py +422 -0
teradataml/sdk/_func_params.py +487 -0
teradataml/sdk/_json_parser.py +453 -0
teradataml/sdk/_openapi_spec_constants.py +249 -0
teradataml/sdk/_utils.py +236 -0
teradataml/sdk/api_client.py +900 -0
teradataml/sdk/constants.py +62 -0
teradataml/sdk/modelops/__init__.py +98 -0
teradataml/sdk/modelops/_client.py +409 -0
teradataml/sdk/modelops/_constants.py +304 -0
teradataml/sdk/modelops/models.py +2308 -0
teradataml/sdk/spinner.py +107 -0
teradataml/series/series.py +12 -7
teradataml/store/feature_store/constants.py +601 -234
teradataml/store/feature_store/feature_store.py +2886 -616
teradataml/store/feature_store/mind_map.py +639 -0
teradataml/store/feature_store/models.py +5831 -214
teradataml/store/feature_store/utils.py +390 -0
teradataml/table_operators/query_generator.py +4 -21
teradataml/table_operators/table_operator_util.py +1 -1
teradataml/table_operators/templates/dataframe_register.template +6 -2
teradataml/table_operators/templates/dataframe_udf.template +6 -2
teradataml/utils/docstring.py +527 -0
teradataml/utils/dtypes.py +95 -1
teradataml/utils/internal_buffer.py +2 -2
teradataml/utils/utils.py +41 -3
teradataml/utils/validators.py +699 -18
{teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +312 -2
{teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +119 -87
{teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
{teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
{teradataml-20.0.0.5.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0

teradataml/LICENSE-3RD-PARTY.pdf CHANGED Viewed

Binary file

teradataml/README.md CHANGED Viewed

@@ -17,6 +17,312 @@ Copyright 2025, Teradata. All Rights Reserved.
 ## Release Notes:
+#### teradataml 20.00.00.07
+* ##### New Features/Functionality
+  * ###### teradataml: DataFrame
+    * `DataFrame.df_type` - Added new property `df_type` to know the type of the DataFrame.
+    * `DataFrame.as_of()` - Added new function which supports temporal time qualifiers on teradataml DataFrame.
+    * `DataFrame.closed_rows()` - Added a new function to retrieve closed rows from a DataFrame created on a transaction-time or bi-temporal table/view.
+    * `DataFrame.open_rows()` - Added a new function to retrieve open rows from a DataFrame created on a transaction-time or bi-temporal table/view.
+    * `DataFrame.historic_rows()` - Added a new function to retrieve historical rows from a DataFrame created on a valid-time or bi-temporal table/view.
+    * `DataFrame.future_rows()` - Added a new function to retrieve future rows from a DataFrame created on a valid-time or bi-temporal table/view.
+    * `DataFrame.create_view()` - Creates a view from the DataFrame object. This function helps the user to persist the DataFrame as a view, which can be used across sessions.
+    * Added argument `persist` to `DataFrame.from_dict()`, `DataFrame.from_pandas()`, and `DataFrame.from_records()` to persist the created DataFrame.
+  * ###### teradataml DataFrameColumn a.k.a. ColumnExpression
+    * `DataFrameColumn.begin()` - Function to get beginning date or timestamp from a PERIOD column.
+    * `DataFrameColumn.end()` - Function to get ending date or timestamp from a PERIOD column.
+    * `DataFrameColumn.between()` - Function to check if the column value is between the lower and upper bounds.
+  * ###### teradataml: Functions
+    * `current_date()` - Gets the current date based on the specified time zone.
+    * `current_timestamp()` - Gets the current timestamp based on the specified time zone.
+  * ###### teradataml: General Functions
+    * Data Transfer Utility
+      * `copy_to_sql()`
+          * A new argument `partition_by` partitions the index while writing to Teradata Vantage.
+          * A new argument `partition_by_case` handles different cases for partitioning the index while writing to Teradata Vantage.
+          * A new argument `partition_by_range` partitions the data based on a range while writing to Teradata Vantage.
+          * A new argument `sub_partition` subpartitions the main partition according to the provided value.
+          * New keyword arguments `valid_time_columns` and `derived_column` helps to copy the data into temporal tables.
+  * ###### Enterprise Feature Store
+    * `FeatureStore` - Main class for managing Feature Store operations with comprehensive methods and properties.
+      * Methods:
+        * `apply()` - Adds Feature, Entity, DataSource, FeatureGroup to FeatureStore.
+        * `archive_data_source()` - Archives a specified DataSource.
+        * `archive_entity()` - Archives a specified Entity.
+        * `archive_feature()` - Archives a specified Feature.
+        * `archive_feature_group()` - Archives a specified FeatureGroup.
+        * `archive_feature_process()` - Archives a specified FeatureProcess.
+        * `delete()` - Deletes the FeatureStore and all its components.
+        * `delete_data_source()` - Deletes an archived DataSource.
+        * `delete_entity()` - Deletes an archived Entity.
+        * `delete_feature()` - Deletes an archived Feature.
+        * `delete_feature_group()` - Deletes an archived FeatureGroup.
+        * `delete_feature_process()` - Deletes an archived FeatureProcess.
+        * `get_data()` - Gets data based on features, entities, and processes.
+        * `get_data_domain()` - Retrieves DataDomain object.
+        * `get_data_source()` - Gets DataSources associated with FeatureStore.
+        * `get_dataset_catalog()` - Retrieves the DatasetCatalog object.
+        * `get_entity()` - Gets Entity associated with FeatureStore.
+        * `get_feature()` - Gets Feature associated with FeatureStore.
+        * `get_feature_group()` - Gets FeatureGroup associated with FeatureStore.
+        * `get_feature_process()` - Retrieves FeatureProcess based on arguments.
+        * `get_feature_catalog()` - Retrieves FeatureCatalog object.
+        * `get_group_features()` - Gets features from a specific feature group.
+        * `list_data_sources()` - Lists DataSources in the FeatureStore.
+        * `list_entities()` - Lists Entities in the FeatureStore.
+        * `list_feature_groups()` - Lists FeatureGroups in the FeatureStore.
+        * `list_features()` - Lists Features in the FeatureStore.
+        * `list_feature_processes()` - Lists all feature processes in the repo.
+        * `list_feature_runs()` - Lists feature process runs and execution status.
+        * `list_feature_catalogs()` - Lists all feature catalogs in the repo.
+        * `list_data_domains()` - Lists all data domains in the repo.
+        * `list_dataset_catalogs()` - Lists all dataset catalogs in the repo.
+        * `list_repos()` - Lists available repos configured for FeatureStore.
+        * `mind_map()` - Generates a mind map visualization of the feature store structure.
+        * `remove_data_domain()` - Removes the data domain from the feature store.
+        * `repair()` - Repairs the underlying FeatureStore schema on database.
+        * `set_features_active()` - Marks Features as active.
+        * `set_features_inactive()` - Marks Features as inactive.
+        * `setup()` - Sets up the FeatureStore for a repository.
+      * Properties:
+        * `data_domain` - Gets or sets the data domain of feature store.
+        * `grant` - Grants access to the FeatureStore.
+        * `repo` - Gets or sets the repository name.
+        * `revoke` - Revokes access from the FeatureStore.
+        * `version` - Gets the version of the FeatureStore.
+    * `FeatureGroup` - Represents a group of features with methods and properties.
+      * Methods:
+        * `apply()` - Applies the feature group to objects.
+        * `from_DataFrame()` - Creates a FeatureGroup from a DataFrame.
+        * `from_query()` - Creates a FeatureGroup from a query.
+        * `ingest_features()` - Ingests features from the FeatureGroup into the FeatureStore.
+        * `remove_feature()` - Removes a feature from the FeatureGroup.
+        * `reset_labels()` - Resets the labels of the FeatureGroup.
+        * `set_labels()` - Sets the labels of the FeatureGroup.
+      * Properties:
+        * `features` - Gets the features in the FeatureGroup.
+        * `labels` - Gets or sets the labels of the FeatureGroup.
+    * `DataDomain` - Represents a data domain within the FeatureStore with properties.
+      * Properties:
+        * `entities` - Gets the entities in the data domain.
+        * `features` - Gets the features in the data domain.
+        * `processes` - Gets the feature processes in the data domain.
+        * `datasets` - Gets the datasets in the data domain.
+    * `FeatureCatalog` - Manages features within a specific data domain.
+      * Methods:
+        * `upload_features()` - Uploads features to the catalog.
+        * `list_features()` - Lists features in the catalog.
+        * `list_feature_versions()` - Lists feature versions in the catalog.
+        * `archive_features()` - Archives features in the catalog.
+        * `delete_features()` - Deletes features from the catalog.
+      * Properties:
+        * `data_domain` - Gets the data domain of the catalog.
+        * `features` - Gets the features in the catalog.
+        * `entities` - Gets the entities in the catalog.
+    * `DatasetCatalog` - Manages datasets within a specific data domain.
+      * Methods:
+        * `build_dataset()` - Builds a dataset from features and entities.
+        * `build_time_series()` - Builds a time series dataset.
+        * `list_datasets()` - Lists datasets in the catalog.
+        * `list_entities()` - Lists entities available for dataset building.
+        * `list_features()` - Lists features available for dataset building.
+        * `get_dataset()` - Gets a specific dataset by ID.
+        * `archive_datasets()` - Archives datasets in the catalog.
+        * `delete_datasets()` - Deletes datasets from the catalog.
+      * Properties:
+        * `data_domain` - Gets the data domain of the catalog.
+    * `Dataset` - Represents a specific dataset in the catalog.
+      * Properties:
+        * `features` - Gets the features in the dataset.
+        * `entity` - Gets the entity of the dataset.
+        * `view_name` - Gets the view name of the dataset.
+        * `id` - Gets the ID of the dataset.
+    * `FeatureProcess` - Represents a feature processing workflow.
+      * Methods:
+        * `run()` - Executes the feature process with optional filters and as_of parameters.
+      * Properties:
+        * `process_id` - Gets the process ID.
+        * `df` - Gets the DataFrame associated with the process.
+        * `features` - Gets the features in the process.
+        * `entity` - Gets the entity in the process.
+        * `data_domain` - Gets the data domain of the process.
+        * `filters` - Gets the filters applied to the process.
+        * `as_of` - Gets the as_of parameter of the process.
+        * `description` - Gets the description of the process.
+        * `start_time` - Gets the start time of the process.
+        * `end_time` - Gets the end time of the process.
+        * `status` - Gets the status of the process.
+  * ###### OpensourceML
+    * `td_sklearn` - Now supports input from OTF tables.
+  * ###### BYOM Function
+    * `ONNXSeq2Seq()` - Applies sequence-to-sequence model in Vantage that has been created outside Vantage and stored in ONNX format.
+  * ###### teradataml: AutoFraud (Automated Machine Learning - Fraud Detection)
+    `AutoFraud` is a special purpose AutoML pipeline designed for fraud detection tasks. It automates the end-to-end process of data preprocessing, feature engineering, model training, evaluation, and deployment to efficiently identify fraudulent activities.
+    * Methods:
+      * `__init__()` - Instantiates an object of AutoFraud.
+      * `fit()` - Performs fit on specified data and target column.
+      * `leaderboard()` - Gets the leaderboard for the AutoFraud pipeline, with diverse models, feature selection methods, and performance metrics.
+      * `leader()` - Shows best performing model and its details such as feature selection method and performance metrics.
+      * `predict()` - Performs prediction on the data using the best model or the model of user's choice from the leaderboard.
+      * `evaluate()` - Performs evaluation on the data using the best model or the model of user's choice from the leaderboard.
+      * `load()` - Loads the saved model from database.
+      * `deploy()` - Saves the trained model inside database.
+      * `remove_saved_model()` - Removes the saved model in database.
+      * `model_hyperparameters()` - Returns the hyperparameters of fitted or loaded models.
+      * `get_persisted_tables()` - Lists the persisted tables created during AutoFraud execution.
+      * `visualize()` - Generates visualizations to analyze and understand the underlying patterns in the data.
+      * `generate_custom_config()` - Generates custom config JSON file required for customized run of AutoFraud.
+  * ###### teradataml: AutoChurn (Automated Machine Learning - Churn Prediction)
+    `AutoChurn` is a special purpose AutoML pipeline for customer churn prediction. It automates the end-to-end process of data preprocessing, feature engineering, model training, evaluation, and deployment to efficiently identify customers likely to churn.
+    * Methods:
+      * `__init__()` - Instantiates an object of AutoChurn.
+      * `fit()` - Performs fit on specified data and target column.
+      * `leaderboard()` - Gets the leaderboard for the AutoChurn pipeline, with diverse models, feature selection methods, and performance metrics.
+      * `leader()` - Shows best performing model and its details such as feature selection method and performance metrics.
+      * `predict()` - Performs prediction on the data using the best model or the model of user's choice from the leaderboard.
+      * `evaluate()` - Performs evaluation on the data using the best model or the model of user's choice from the leaderboard.
+      * `load()` - Loads the saved model from database.
+      * `deploy()` - Saves the trained model inside database.
+      * `remove_saved_model()` - Removes the saved model in database.
+      * `model_hyperparameters()` - Returns the hyperparameters of fitted or loaded models.
+      * `get_persisted_tables()` - Lists the persisted tables created during AutoChurn execution.
+      * `visualize()` - Generates visualizations to analyze and understand the underlying patterns in the data.
+      * `generate_custom_config()` - Generates custom config JSON file required for customized run of AutoChurn.
+  * ###### teradataml: AutoCluster (Automated Machine Learning - Clustering)
+    `AutoCluster` is a special purpose AutoML pipeline for clustering analysis. It automates the end-to-end process of data preprocessing, feature engineering, model training, and prediction to efficiently group data into clusters and extract insights from unlabeled datasets.
+    * Methods:
+      * `__init__()` - Instantiates an object of AutoCluster.
+      * `fit()` - Performs fit on specified data.
+      * `leaderboard()` - Gets the leaderboard for the AutoCluster pipeline, with diverse models, feature selection methods, and performance metrics.
+      * `leader()` - Shows best performing model and its details such as feature selection method and performance metrics.
+      * `predict()` - Performs prediction (cluster assignment) on the data using the best model or the model of user's choice from the leaderboard.
+      * `model_hyperparameters()` - Returns the hyperparameters of fitted or loaded models.
+      * `get_persisted_tables()` - Lists the persisted tables created during AutoCluster execution.
+      * `generate_custom_config()` - Generates custom config JSON file required for customized run of AutoCluster.
+* ##### Updates
+  * ###### teradataml: Functions
+    * `udf()` -  Added support for `td_buffer` to cache the data in the user defined function.
+  * ###### Open Analytics Framework (OpenAF)
+    * UserEnv Class.
+      * Properties:
+        * `models` - Supports listing of models installed from external model registry like HuggingFace  as well.
+      * Methods:
+        * `install_model()` - Added new arguments `model_name`, `model_type` and `api_key` to support installation of models from external model registry like HuggingFace .
+        * `uninstall_model()` - Supports uninstallation of a model from user environment which is installed from external model registry like HuggingFace .
+* ##### Bug Fixes
+  * `set_auth_token()` generates JWT token using default value for iat claim when authentication is being done using PEM file and PAT.
+  * `create_env` - When an unavailable R base environment is provided in `create_env()`, requested R user environment is created using latest R base environment version
+  out of available base environments. Earlier, `create_env()` would create user environment with latest Python base environment version even though the request is for R user environment.
+  * Fixed userWarning in `db_list_tables()`.
+#### teradataml 20.00.00.06
+* ##### New Features/Functionality
+  * ###### teradataml: SDK
+    * Added new client `teradataml.sdk.Client` which can be used by user to make REST calls through SDK.
+    * New exception added in `teradataml`, specifically for REST APIs `TeradatamlRestException` that has attribute `json_resonse` providing proper printable json.
+    * Exposed three different ways of authentication through `Client`.
+      * Client credentials Authentication through `ClientCredentialsAuth` class.
+      * Device code Authentication through `DeviceCodeAuth` class.
+      * Bearer Authentication through `BearerAuth` class.
+  * ###### teradataml: ModelOps SDK
+    * `teradataml` exposes Python interfaces for all the REST APIs provided by Teradata Vantage ModelOps.
+    * Added support for `blueprint()` method which prints available classes in `modelops` module.
+    * Added new client `ModelOpsClient` with some additional function compared to `teradataml.sdk.Client`.
+    * teradataml classes are added for the schema in ModelOps OpenAPI specification.
+    ```python
+    >>> from teradataml.sdk.modelops import ModelOpsClient, Projects
+    >>> from teradataml.common.exceptions import TeradatamlRestException
+    >>> from teradataml.sdk import DeviceCodeAuth, BearerAuth, ClientCredentialsAuth # Authentication related classes.
+    >>> from teradataml.sdk.modelops import models # All classes related to OpenAPI schema are present in this module.
+    # Print available classes in modelops module.
+    >>> from teradataml.sdk.modelops import blueprint
+    >>> blueprint()
+    # Create ClientCredentialsAuth object and create ModelOpsClient object.
+    >>> cc_obj = ClientCredentialsAuth(auth_client_id="<client_id>",
+                                       auth_client_secret="<client_secret>",
+                                       auth_token_url="https://<example.com>/token")
+    >>> client = ModelOpsClient(base_url="<base_url>", auth=cc_obj, ssl_verify=False)
+    # Create Projects object.
+    >>> p = Projects(client=client)
+    # Create project using `body` argument taking object of ProjectRequestBody.
+    >>> project_paylod = {
+            "name": "dummy_project",
+            "description": "dummy_project created for testing",
+            "groupId": "<group_ID>",
+            "gitRepositoryUrl": "/app/built-in/empty",
+            "branch": "<branch>"
+        }
+    >>> p.create_project(body=models.ProjectRequestBody(**project_payload))
+    ```
+  * ###### teradataml: Functions
+    * `get_formatters()` - Get the formatters for NUMERIC, DATE and CHAR types.
+  * ###### teradataml: DataFrame Methods
+    * `get_snapshot()` - Gets the snapshot data of a teradataml DataFrame created on OTF table for a given snapshot id or timestamp.
+    * `from_pandas()`: Creates a teradataml DataFrame from a pandas DataFrame.
+    * `from_records()`: Creates a teradataml DataFrame from a list.
+    * `from_dict()`: Creates a teradataml DataFrame from a dictionary.
+  * ###### teradataml: DataFrame Property
+    * `history` - Returns snapshot history for a DataFrame created on OTF table.
+    * `manifests` - Returns manifest information for a DataFrame created on OTF table.
+    * `partitions` - Returns partition information for a DataFrame created on OTF table.
+    * `snapshots` - Returns snapshot information for a DataFrame created on OTF table.
+  * ###### teradataml DataFrameColumn a.k.a. ColumnExpression
+    * `DataFrameColumn.rlike()` - Function to match a string against a regular expression pattern.
+    * `DataFrameColumn.substring_index()` - Function to return the substring from a column before a specified
+    delimiter, up to a given occurrence count.
+    * `DataFrameColumn.count_delimiters()` - Function to count the total number of occurrences of a specified delimiter.
+* ##### Updates
+  * ###### teradataml DataFrameColumn a.k.a. ColumnExpression
+    * `DataFrameColumn.like()`
+      * Added argument `escape_char` to specify the escape character for the LIKE pattern.
+      * Argument `pattern` now accepts DataFrameColumn as input.
+    * `DataFrameColumn.ilike()`
+      * Added argument `escape_char` to specify the escape character for the ILIKE pattern.
+      * Argument `pattern` now accepts DataFrameColumn as input.
+    * `DataFrameColumn.parse_url()` - Added argument `key` to extract a specific query parameter when `url_part` is set to "QUERY".
+  * ###### teradataml: DataFrame function
+    * `groupby()`, `cube()` and `rollup()`
+       * Added argument `include_grouping_columns` to include aggregations on the grouping column(s).
+    * `DataFrame()`: New argument `data`, that accepts input data to create a teradataml DataFrame, is added.
+  * ###### General functions
+    * `set_auth_token()`
+      * New keyword argument `auth_url` accepts the endpoint URL for a keycloak server.
+      * New keyword argument `rest_client` accepts name of the service for which keycloak token is to be generated.
+      * New keyword argument `validate_jwt` accepts the boolean flag to decide whether to validate generated JWT token or not.
+      * New keyword argument `valid_from` accepts the epoch seconds representing time from which JWT token will be valid.
+  * ###### teradataml Options
+    * Configuration Options
+      * `configure.use_short_object_name`
+        Specifies whether to use a shorter name for temporary database objects which are created by teradataml internally.
+  * ###### BYOM Function
+    * Supports special characters.
 #### teradataml 20.00.00.05
 * ##### New Features/Functionality
   * ##### teradataml: AutoML

teradataml/__init__.py CHANGED Viewed

@@ -62,7 +62,7 @@ _TDML_DIRECTORY = os.path.dirname(v.__file__)
 from teradataml.opensource import *
 # Import AutoML
-from teradataml.automl import AutoML, AutoRegressor, AutoClassifier
+from teradataml.automl import AutoML, AutoRegressor, AutoClassifier, AutoChurn, AutoFraud, AutoCluster
 from teradataml.automl.autodataprep import AutoDataPrep
 # Import global variable representing session_queryband.

teradataml/_version.py CHANGED Viewed

@@ -8,4 +8,4 @@
 #
 # ##################################################################
-version = "20.00.00.05"
+version = "20.00.00.07"

teradataml/analytics/analytic_function_executor.py CHANGED Viewed

@@ -358,13 +358,7 @@ class _AnlyticFunctionExecutor:
     def _process_other_argument(self, **kwargs):
         """
         DESCRIPTION:
-            Function to process other arguments. This function does the following:
-                * Checks the required arguments are passed or not.
-                * Checks the type of the arguments are expected or not.
-                * If argument accepts only specified values, function checks whether
-                  the value passed is in the specified values or not.
-                * If all the checks pass, it then populates the corresponding lists
-                  with respective values.
+            Function to process other arguments.
         PARAMETERS:
             kwargs:
@@ -441,68 +435,165 @@ class _AnlyticFunctionExecutor:
         # Let's process all other arguments.
         for argument in self._metadata.arguments:
-            sql_name = argument.get_sql_name()
-            arg_name = argument.get_lang_name()
-            arg_value = kwargs.get(arg_name)
-            # Set the "argument".
-            self._spl_func_obj.set_arg_name(argument)
-            # Let's get spl handler if function requires.
-            special_case_handler = self._spl_func_obj._get_handle()
+            # If 'regexMatch' field is True in the JSON, extract all the
+            # arguments which follows the regex pattern specified in 'name'
+            # and 'rName' field.
+            if argument.regex_match():
+                m_name = argument.match_name()
+                a_name = argument.get_lang_name()
+                arg_names = argument.get_regex_matched_arguments(a_name,
+                                                                 **kwargs)
+                # If matchName is None, the SQL names remain the same as the
+                # Python names. Otherwise, the SQL names are replaced with
+                # those whose sql_name starts with the specified matching name.
+                if not m_name:
+                    sql_names = arg_names
+                else:
+                    sql_names = argument.get_regex_sql_name(argument.get_sql_name(),
+                                                            m_name,
+                                                            arg_names)
+                for a_name, s_name in zip(arg_names, sql_names):
+                    arg_value = kwargs.get(a_name)
+                    seq_inp_by = self._process_other_arguments_and_get_sequence_input_by_arg(
+                        argument, a_name, s_name, arg_value, **kwargs)
+                    if seq_inp_by:
+                        sequence_input_by_list.append(seq_inp_by)
+            else:
+                sql_name = argument.get_sql_name()
+                arg_name = argument.get_lang_name()
+                arg_value = kwargs.get(arg_name)
-            self._validate_analytic_function_argument(arg_name, arg_value, argument)
-            # Extract column names if it is a Feature.
-            arg_value = self._get_column_name_from_feature(arg_value)
-            # Perform the checks which are specific to argument(_AnlyFuncArgument) type.
-            # Check lower bound and upper bound for number type of arguments.
-            if isinstance(arg_value, (int, float)):
-                lower_bound_inclusive = argument.get_lower_bound_type() == "INCLUSIVE"
-                upper_bound_inclusive = argument.get_upper_bound_type() == "INCLUSIVE"
-                _Validators._validate_argument_range(arg_value,
-                                                     arg_name,
-                                                     lbound=argument.get_lower_bound(),
-                                                     ubound=argument.get_upper_bound(),
-                                                     lbound_inclusive=lower_bound_inclusive,
-                                                     ubound_inclusive=upper_bound_inclusive)
-            if argument.is_column_argument() and not argument.get_target_table():
-                raise TeradataMlException(
-                    Messages.get_message(MessageCodes.INVALID_JSON, "{}.json".format(self._metadata.sql_function_name),
-                                         "Argument '{}' is specified as column argument but "
-                                         "is Target table is not specified".format(sql_name)), MessageCodes.INVALID_JSON)
-            # Additional Validations if argument is a Column name.
-            if argument.is_column_argument() and argument.get_target_table():
-                target_table_argument_name = argument.get_target_table_lang_name()
-                dataframe = kwargs.get(target_table_argument_name)
-                # Input table can be an object of MLE Functions too.
-                if not self._is_argument_dataframe(dataframe) and dataframe is not None:
-                    dataframe = dataframe._mlresults[0]
-                # Validate column is existed or not in the table.
-                _Validators._validate_dataframe_has_argument_columns(
-                    arg_value, arg_name, dataframe, target_table_argument_name, case_insensitive=True)
-                # Append square brackets for column range when function
-                # does not require special case handler.
-                arg_value = self._spl_func_obj._add_square_bracket(arg_value)
-                # Check if there are columns with non-ASCII characters.
-                if UtilFuncs._is_ascii(arg_value):
-                    arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
-                # Handling special case for Teradata reserved keywords or column names with spaces.
-                # If argument is a string or list of strings, then add quotes to the string.
-                elif arg_name not in ["partition_columns"] and (\
-                    UtilFuncs._contains_space(arg_value) or list_td_reserved_keywords(arg_value)):
-                    arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
-            # SequenceInputBy arguments require special processing.
-            if 500 <= argument.get_r_order_number() <= 510:
-                quoted_value = UtilFuncs._teradata_collapse_arglist(arg_value, "")
-                sequence_input_by_list.append("{}:{}".format(sql_name, quoted_value))
-                continue
+                seq_inp_by = self._process_other_arguments_and_get_sequence_input_by_arg(
+                    argument, arg_name, sql_name, arg_value, **kwargs)
+                if seq_inp_by:
+                    sequence_input_by_list.append(seq_inp_by)
+        if sequence_input_by_list:
+            self._func_other_arg_sql_names.append("SequenceInputBy")
+            sequence_input_by_arg_value = UtilFuncs._teradata_collapse_arglist(sequence_input_by_list, "'")
+            self._func_other_args.append(sequence_input_by_arg_value)
+            self._func_other_arg_json_datatypes.append("STRING")
+            self._sql_specific_attributes["SequenceInputBy"] = sequence_input_by_arg_value
+    def _process_other_arguments_and_get_sequence_input_by_arg(self, argument, arg_name, sql_name, arg_value, **kwargs):
+        """
+        DESCRIPTION:
+            Function to process the arguments on below checks and get the other arguments.
+            This function does the following:
+                * Checks the required arguments are passed or not.
+                * Checks the type of the arguments are expected or not.
+                * If argument accepts only specified values, function checks whether
+                  the value passed is in the specified values or not.
+                * If all the checks pass, it then populates the corresponding lists
+                  with respective values.
+        PARAMETERS:
+            argument:
+                Required Argument.
+                Specifies information about analytic function argument.
+                Types: teradataml.analytics.json_parser.analytic_functions_argument._AnlyFuncArgument
+            arg_name:
+                Required Argument.
+                Specifies python name of argument.
+                Types: str
+            sql_name:
+                Required Argument.
+                Specifies SQL name of argument.
+                Types: str
+            arg_value:
+                Required Argument.
+                Specifies value of argument.
+                Types: datatype provided in the JSON
+            kwargs:
+                Specifies the keyword arguments passed to a function.
+        RETURNS:
+            str
+        RAISES:
+            ValueError OR TypeError OR TeradataMlException.
+        EXAMPLES:
+            arg = _AnlyFuncArgument(sql_name="sql_name_param",
+                                is_required=True,
+                                sql_description="sql_description_param",
+                                lang_description="lang_description_param",
+                                lang_name="lang_name_param",
+                                use_in_r=False,
+                                r_order_num=5,
+                                datatype="int")
+            self._process_other_arguments_and_get_sequence_input_by_arg(
+            arg, "lang_name_param", "sql_name_param", 2,
+            const_num=2, cost_min_len=20)
+        """
+        seq_inp_by = None
+        # Set the "argument".
+        self._spl_func_obj.set_arg_name(argument)
+        # Let's get spl handler if function requires.
+        special_case_handler = self._spl_func_obj._get_handle()
+        self._validate_analytic_function_argument(arg_name, arg_value, argument)
+        # Extract column names if it is a Feature.
+        arg_value = self._get_column_name_from_feature(arg_value)
+        # Perform the checks which are specific to argument(_AnlyFuncArgument) type.
+        # Check lower bound and upper bound for number type of arguments.
+        if isinstance(arg_value, (int, float)):
+            lower_bound_inclusive = argument.get_lower_bound_type() == "INCLUSIVE"
+            upper_bound_inclusive = argument.get_upper_bound_type() == "INCLUSIVE"
+            _Validators._validate_argument_range(arg_value,
+                                                 arg_name,
+                                                 lbound=argument.get_lower_bound(),
+                                                 ubound=argument.get_upper_bound(),
+                                                 lbound_inclusive=lower_bound_inclusive,
+                                                 ubound_inclusive=upper_bound_inclusive)
+        if argument.is_column_argument() and not argument.get_target_table():
+            raise TeradataMlException(
+                Messages.get_message(MessageCodes.INVALID_JSON, "{}.json".format(self._metadata.sql_function_name),
+                                     "Argument '{}' is specified as column argument but "
+                                     "is Target table is not specified".format(sql_name)), MessageCodes.INVALID_JSON)
+        if argument.is_column_argument() and argument.get_target_table():
+            target_table_argument_name = argument.get_target_table_lang_name()
+            dataframe = kwargs.get(target_table_argument_name)
+            # Input table can be an object of MLE Functions too.
+            if not self._is_argument_dataframe(dataframe) and dataframe is not None:
+                dataframe = dataframe._mlresults[0]
+            # Validate column is existed or not in the table.
+            _Validators._validate_dataframe_has_argument_columns(
+                arg_value, arg_name, dataframe, target_table_argument_name, case_insensitive=True)
+            # Append square brackets for column range when function
+            # does not require special case handler.
+            arg_value = self._spl_func_obj._add_square_bracket(arg_value)
+            # Check if there are columns with non-ASCII characters.
+            if UtilFuncs._is_non_ascii(arg_value):
+                arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
+            # Handling special case for Teradata reserved keywords or column names with spaces.
+            # If argument is a string or list of strings, then add quotes to the string.
+            elif arg_name not in ["partition_columns"] and ( \
+                            UtilFuncs._contains_space(arg_value) or list_td_reserved_keywords(arg_value)):
+                arg_value = UtilFuncs._teradata_quote_arg(arg_value, "\"", False)
+        # SequenceInputBy arguments require special processing.
+        if 500 <= argument.get_r_order_number() <= 510:
+            quoted_value = UtilFuncs._teradata_collapse_arglist(arg_value, "")
+            seq_inp_by = "{}:{}".format(sql_name, quoted_value)
+        else:
             if arg_value is not None and arg_value != argument.get_default_value():
@@ -511,19 +602,14 @@ class _AnlyticFunctionExecutor:
                 # Handle special cases for arg_values based on function handler.
                 arg_value = special_case_handler(arg_value, self._quote_collapse_other_args) \
-                                                if special_case_handler is not None \
-                                                else self._quote_collapse_other_args(argument, arg_value)
+                    if special_case_handler is not None \
+                    else self._quote_collapse_other_args(argument, arg_value)
                 self._func_other_arg_sql_names.append(sql_name)
                 self._func_other_args.append(arg_value)
                 self._func_other_arg_json_datatypes.append(argument.get_data_type())
-        if sequence_input_by_list:
-            self._func_other_arg_sql_names.append("SequenceInputBy")
-            sequence_input_by_arg_value = UtilFuncs._teradata_collapse_arglist(sequence_input_by_list, "'")
-            self._func_other_args.append(sequence_input_by_arg_value)
-            self._func_other_arg_json_datatypes.append("STRING")
-            self._sql_specific_attributes["SequenceInputBy"] = sequence_input_by_arg_value
+        return seq_inp_by
     def _create_dynamic_class(self):
         """

teradataml/analytics/byom/__init__.py CHANGED Viewed

@@ -4,7 +4,7 @@ from teradataml.analytics.byom.PMMLPredict import PMMLPredict
 from teradataml.analytics.meta_class import _AnalyticFunction
 from teradataml.analytics.meta_class import _common_init, _common_dir
-_byom_functions = ['H2OPredict', 'PMMLPredict', 'ONNXPredict', 'DataikuPredict', 'DataRobotPredict', 'ONNXEmbeddings']
+_byom_functions = ['H2OPredict', 'PMMLPredict', 'ONNXPredict', 'DataikuPredict', 'DataRobotPredict', 'ONNXEmbeddings', 'ONNXSeq2Seq']
 for func in _byom_functions:
     globals()[func] = type("{}".format(func), (_AnalyticFunction,),

teradataml/analytics/json_parser/__init__.py CHANGED Viewed

@@ -86,6 +86,8 @@ class JsonFields:
     OPERATOR = "operator"
     DEPENDENT_ARGUMENT_VALUE = "argumentValue"
     FUNCTION_ALIAS_NAME = "function_alias_name"
+    REGEX_MATCH = "regexMatch"
+    MATCH_NAME = "matchName"
 class SqleJsonFields(JsonFields):

teradataml 20.0.0.5__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

Potentially problematic release.

teradataml 20.0.0.5py3-none-any.whl → 20.0.0.7py3-none-any.whl