teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +119 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +18 -6
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/sqle/__init__.py +4 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +51 -6
- teradataml/automl/data_preparation.py +56 -33
- teradataml/automl/data_transformation.py +58 -33
- teradataml/automl/feature_engineering.py +12 -5
- teradataml/automl/model_training.py +34 -13
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +64 -40
- teradataml/common/messagecodes.py +13 -3
- teradataml/common/messages.py +4 -1
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +113 -39
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +141 -17
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +5 -5
- teradataml/data/teradataml_example.json +8 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +11 -1
- teradataml/dataframe/dataframe.py +517 -121
- teradataml/dataframe/dataframe_utils.py +152 -20
- teradataml/dataframe/functions.py +26 -11
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +2 -2
- teradataml/dbutils/dbutils.py +525 -129
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +317 -1011
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -25
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +2 -2
- teradataml/scriptmgmt/lls_utils.py +63 -26
- teradataml/store/__init__.py +1 -2
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +32 -18
- teradataml/table_operators/Script.py +3 -1
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/utils/dtypes.py +47 -0
- teradataml/utils/internal_buffer.py +18 -0
- teradataml/utils/validators.py +68 -9
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +123 -2
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +79 -75
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
|
@@ -74,6 +74,11 @@ def list_base_envs():
|
|
|
74
74
|
5 r_4.0.2 R 4.0.2
|
|
75
75
|
>>>
|
|
76
76
|
"""
|
|
77
|
+
# Check if the cache data is available and is not stale.
|
|
78
|
+
# If available, return the data.
|
|
79
|
+
if _InternalBuffer.get('list_base_envs') is not None:
|
|
80
|
+
return _InternalBuffer.get('list_base_envs')
|
|
81
|
+
|
|
77
82
|
try:
|
|
78
83
|
response = UtilFuncs._http_request(_get_ues_url("base_environments"), headers=_get_auth_token())
|
|
79
84
|
|
|
@@ -86,7 +91,8 @@ def list_base_envs():
|
|
|
86
91
|
return
|
|
87
92
|
|
|
88
93
|
# Create a pandas DataFrame from data.
|
|
89
|
-
|
|
94
|
+
_InternalBuffer.add(list_base_envs=pd.DataFrame.from_records(data))
|
|
95
|
+
return _InternalBuffer.get('list_base_envs')
|
|
90
96
|
|
|
91
97
|
except (TeradataMlException, RuntimeError):
|
|
92
98
|
raise
|
|
@@ -417,21 +423,20 @@ def __create_envs(template):
|
|
|
417
423
|
# Install files if requested any.
|
|
418
424
|
if files:
|
|
419
425
|
print("Installing files in environment '{}'...".format(env_name))
|
|
420
|
-
|
|
421
|
-
|
|
422
|
-
files = [files]
|
|
426
|
+
if isinstance(files, str):
|
|
427
|
+
files = [files]
|
|
423
428
|
|
|
424
|
-
|
|
429
|
+
for file in files:
|
|
430
|
+
try:
|
|
425
431
|
if os.path.isfile(file):
|
|
426
432
|
env_handle.install_file(file)
|
|
427
433
|
elif os.path.isdir(file):
|
|
428
|
-
__install_files(env_handle, file)
|
|
429
|
-
|
|
430
|
-
|
|
431
|
-
|
|
432
|
-
|
|
433
|
-
|
|
434
|
-
pass
|
|
434
|
+
errored = __install_files(env_handle, file)
|
|
435
|
+
except Exception as file_installation_failure:
|
|
436
|
+
print("Failed to install file '{}' in environment '{}'.".format(file, env_name))
|
|
437
|
+
print(str(file_installation_failure))
|
|
438
|
+
errored = True
|
|
439
|
+
pass
|
|
435
440
|
|
|
436
441
|
# Install libraries if requested any.
|
|
437
442
|
if libs or libs_file_path:
|
|
@@ -472,6 +477,10 @@ def __get_default_base_env():
|
|
|
472
477
|
Function returns the latest python environment available with
|
|
473
478
|
Open Analytics Framework.
|
|
474
479
|
"""
|
|
480
|
+
# Check if the default base environment is already available.
|
|
481
|
+
if _InternalBuffer.get('default_base_env') is not None:
|
|
482
|
+
return _InternalBuffer.get('default_base_env')
|
|
483
|
+
|
|
475
484
|
try:
|
|
476
485
|
base_envs = list_base_envs()
|
|
477
486
|
python_versions = base_envs[base_envs.language == 'Python']['version']
|
|
@@ -481,7 +490,9 @@ def __get_default_base_env():
|
|
|
481
490
|
latest_version_tuple = max(version_tuples)
|
|
482
491
|
# Convert the latest version tuple back to a string
|
|
483
492
|
latest_version = '.'.join(map(str, latest_version_tuple))
|
|
484
|
-
|
|
493
|
+
# Get the base environment name for the latest version
|
|
494
|
+
_InternalBuffer.add(default_base_env=base_envs[base_envs.version == latest_version]['base_name'].to_list()[0])
|
|
495
|
+
return _InternalBuffer.get('default_base_env')
|
|
485
496
|
except Exception as base_env_err:
|
|
486
497
|
raise Exception("Failed to obtain default base environment.", str(base_env_err.exception))
|
|
487
498
|
|
|
@@ -491,9 +502,20 @@ def __install_files(env, directory):
|
|
|
491
502
|
Function to install files under given directory and
|
|
492
503
|
all the subdirectories recursively.
|
|
493
504
|
"""
|
|
505
|
+
errored = False
|
|
494
506
|
for (dir_path, dir_names, file_names) in os.walk(directory):
|
|
507
|
+
# install the files under all the directories.
|
|
508
|
+
# If any problem with any file installation, skip the error
|
|
509
|
+
# and proceed to install other files.
|
|
495
510
|
for file_name in file_names:
|
|
496
|
-
|
|
511
|
+
try:
|
|
512
|
+
env.install_file(os.path.join(dir_path, file_name))
|
|
513
|
+
except Exception as file_installation_failure:
|
|
514
|
+
print("Failed to install file '{}' in environment '{}'.".format(file_name, env.env_name))
|
|
515
|
+
print(str(file_installation_failure))
|
|
516
|
+
errored = True
|
|
517
|
+
|
|
518
|
+
return errored
|
|
497
519
|
|
|
498
520
|
|
|
499
521
|
@collect_queryband(queryband="CrtEnv")
|
|
@@ -751,9 +773,27 @@ def create_env(env_name=None, base_env=None, desc=None, template=None, conda_env
|
|
|
751
773
|
# Get the latest python base env in OpenAF, if base_env is not provided,
|
|
752
774
|
# Or if base_env is provided and not in the list of base envs.
|
|
753
775
|
# Note: By default python base env is obtained.
|
|
754
|
-
if not
|
|
755
|
-
(
|
|
756
|
-
base_env
|
|
776
|
+
if configure.ues_url is not None and \
|
|
777
|
+
get_connection() is not None:
|
|
778
|
+
# Check if base_env is provided or not in the list of base envs.
|
|
779
|
+
|
|
780
|
+
# Check if user requested for conda environment but do not specify the base_env.
|
|
781
|
+
# In such case, set base_env to the default python base environment.
|
|
782
|
+
if conda_env:
|
|
783
|
+
if base_env is None:
|
|
784
|
+
base_env = __get_default_base_env()
|
|
785
|
+
# Not a conda environment.
|
|
786
|
+
else:
|
|
787
|
+
# Check if base_env provided or not. If provided, check if it is available in
|
|
788
|
+
# the list of base envs. If not available, set base_env to the default python base env.
|
|
789
|
+
if not base_env or \
|
|
790
|
+
base_env.lower() not in list_base_envs()['base_name'].str.lower().to_list():
|
|
791
|
+
# Print warning message if base_env provided is not available.
|
|
792
|
+
if base_env:
|
|
793
|
+
print(f"Note: The specified base environment '{base_env}' is unavailable. "\
|
|
794
|
+
"Using the default base environment as specified in the documentation.")
|
|
795
|
+
# Set base_env to the default
|
|
796
|
+
base_env = __get_default_base_env()
|
|
757
797
|
if not desc:
|
|
758
798
|
desc = "This env '{}' is created with base env '{}'.".format(env_name, base_env)
|
|
759
799
|
try:
|
|
@@ -1559,7 +1599,6 @@ def get_user_env():
|
|
|
1559
1599
|
|
|
1560
1600
|
|
|
1561
1601
|
@collect_queryband(queryband="StAthTkn")
|
|
1562
|
-
@argument_deprecation("20.00.00.04", "ues_url", False, "base_url")
|
|
1563
1602
|
def set_auth_token(base_url=None, client_id=None, pat_token=None, pem_file=None, **kwargs):
|
|
1564
1603
|
"""
|
|
1565
1604
|
DESCRIPTION:
|
|
@@ -1568,17 +1607,11 @@ def set_auth_token(base_url=None, client_id=None, pat_token=None, pem_file=None,
|
|
|
1568
1607
|
Note:
|
|
1569
1608
|
User must have a privilege to login with a NULL password to use set_auth_token().
|
|
1570
1609
|
Please refer to GRANT LOGON section in Teradata Documentation for more details.
|
|
1571
|
-
If
|
|
1572
|
-
If
|
|
1610
|
+
If base_url and client_id are specified then authentication is through OAuth.
|
|
1611
|
+
If base_url, pat_token, pem_file are specified then authentication is through PAT.
|
|
1573
1612
|
Refresh token still works but only for OAuth authentication.
|
|
1574
1613
|
|
|
1575
1614
|
PARAMETERS:
|
|
1576
|
-
|
|
1577
|
-
ues_url:
|
|
1578
|
-
Required Argument.
|
|
1579
|
-
Specifies the URL for User Environment Service in VantageCloud Lake.
|
|
1580
|
-
Types: str
|
|
1581
|
-
|
|
1582
1615
|
base_url:
|
|
1583
1616
|
Required Argument.
|
|
1584
1617
|
Specifies the CCP endpoint URL.
|
|
@@ -1769,5 +1802,9 @@ def set_auth_token(base_url=None, client_id=None, pat_token=None, pem_file=None,
|
|
|
1769
1802
|
token_data = auth_wf._proxy_jwt()
|
|
1770
1803
|
# Store the jwt token in internal class attribute.
|
|
1771
1804
|
_InternalBuffer.add(auth_token=_AuthToken(token=token_data))
|
|
1805
|
+
# If set_auth_token is triggered then it will be ccp_enabled = True.
|
|
1806
|
+
# The function returns if we have just passed the auth_token, thus
|
|
1807
|
+
# having ccp_enabled = False.
|
|
1808
|
+
configure._ccp_enabled = True
|
|
1772
1809
|
|
|
1773
1810
|
return True
|
teradataml/store/__init__.py
CHANGED
|
@@ -9,5 +9,4 @@ This file imports components from Feature Store and Vector Store.
|
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
from teradataml.store.feature_store.feature_store import FeatureStore
|
|
12
|
-
from teradataml.store.feature_store.models import *
|
|
13
|
-
from teradataml.store.vector_store import *
|
|
12
|
+
from teradataml.store.feature_store.models import *
|
|
@@ -66,8 +66,7 @@ class FeatureStore:
|
|
|
66
66
|
self.__df_container = {}
|
|
67
67
|
|
|
68
68
|
# Store the table names here. Then use this where ever required.
|
|
69
|
-
self.__table_names =
|
|
70
|
-
for name, table_name in EFS_TABLES.items()}
|
|
69
|
+
self.__table_names = EFS_TABLES
|
|
71
70
|
|
|
72
71
|
# Declare getter's for getting the corresponding DataFrame's.
|
|
73
72
|
self.__get_features_df = lambda : self.__get_obj_df("feature")
|
|
@@ -1560,6 +1559,7 @@ class FeatureStore:
|
|
|
1560
1559
|
table_name = self.__table_names["{}_staging".format(type_)]
|
|
1561
1560
|
|
|
1562
1561
|
res = _delete_data(table_name=table_name,
|
|
1562
|
+
schema_name=self.__repo,
|
|
1563
1563
|
delete_conditions=(Col("name") == name)
|
|
1564
1564
|
)
|
|
1565
1565
|
|
|
@@ -1606,11 +1606,13 @@ class FeatureStore:
|
|
|
1606
1606
|
|
|
1607
1607
|
# remove it from xref table first.
|
|
1608
1608
|
_delete_data(table_name=ent_table_xref,
|
|
1609
|
+
schema_name=self.__repo,
|
|
1609
1610
|
delete_conditions=(Col("entity_name") == name)
|
|
1610
1611
|
)
|
|
1611
1612
|
|
|
1612
1613
|
# remove from entity table.
|
|
1613
1614
|
res = _delete_data(table_name=ent_table,
|
|
1615
|
+
schema_name=self.__repo,
|
|
1614
1616
|
delete_conditions=(Col("name") == name)
|
|
1615
1617
|
)
|
|
1616
1618
|
|
|
@@ -1758,6 +1760,96 @@ class FeatureStore:
|
|
|
1758
1760
|
"""
|
|
1759
1761
|
return self.__remove_obj(name=feature, type_="feature")
|
|
1760
1762
|
|
|
1763
|
+
def delete(self):
|
|
1764
|
+
"""
|
|
1765
|
+
DESCRIPTION:
|
|
1766
|
+
Removes the FeatureStore and its components from repository.
|
|
1767
|
+
Notes:
|
|
1768
|
+
* The function removes all the associated database objects along with data.
|
|
1769
|
+
Be cautious while using this function.
|
|
1770
|
+
* The function tries to remove the underlying Database also once
|
|
1771
|
+
all the Feature Store objects are removed.
|
|
1772
|
+
* The user must have permission on the database used by this Feature Store
|
|
1773
|
+
* to drop triggers.
|
|
1774
|
+
* to drop the tables.
|
|
1775
|
+
* to drop the Database.
|
|
1776
|
+
* If the user lacks any of the mentioned permissions, Teradata recommends
|
|
1777
|
+
to not use this function.
|
|
1778
|
+
|
|
1779
|
+
PARAMETERS:
|
|
1780
|
+
None
|
|
1781
|
+
|
|
1782
|
+
RETURNS:
|
|
1783
|
+
bool.
|
|
1784
|
+
|
|
1785
|
+
RAISES:
|
|
1786
|
+
None
|
|
1787
|
+
|
|
1788
|
+
EXAMPLES:
|
|
1789
|
+
# Setup FeatureStore for repo 'vfs_v1'.
|
|
1790
|
+
>>> from teradataml import FeatureStore
|
|
1791
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1792
|
+
>>> fs.setup()
|
|
1793
|
+
True
|
|
1794
|
+
>>> # Delete FeatureStore.
|
|
1795
|
+
>>> fs.delete()
|
|
1796
|
+
True
|
|
1797
|
+
>>>
|
|
1798
|
+
"""
|
|
1799
|
+
confirmation = input("The function removes Feature Store and drops the "
|
|
1800
|
+
"corresponding repo also. Are you sure you want to proceed? (Y/N): ")
|
|
1801
|
+
|
|
1802
|
+
if confirmation in ["Y", "y"]:
|
|
1803
|
+
return self.__drop_feature_store_objects(self.__repo)
|
|
1804
|
+
|
|
1805
|
+
return False
|
|
1806
|
+
|
|
1807
|
+
@staticmethod
|
|
1808
|
+
def __drop_feature_store_objects(repo_name):
|
|
1809
|
+
"""
|
|
1810
|
+
DESCRIPTION:
|
|
1811
|
+
Removes the FeatureStore and it's components from repository.
|
|
1812
|
+
|
|
1813
|
+
PARAMETERS:
|
|
1814
|
+
repo_name:
|
|
1815
|
+
Required Argument.
|
|
1816
|
+
Specifies the name of the repository.
|
|
1817
|
+
Types: str
|
|
1818
|
+
|
|
1819
|
+
RETURNS:
|
|
1820
|
+
bool
|
|
1821
|
+
"""
|
|
1822
|
+
# Drop all the tables and staging tables.
|
|
1823
|
+
tables_ = [
|
|
1824
|
+
EFS_GROUP_FEATURES_SPEC["table_name"],
|
|
1825
|
+
EFS_FEATURE_GROUP_SPEC["table_name"],
|
|
1826
|
+
EFS_FEATURES_SPEC['table_name'],
|
|
1827
|
+
EFS_ENTITY_XREF_SPEC['table_name'],
|
|
1828
|
+
EFS_ENTITY_SPEC["table_name"],
|
|
1829
|
+
EFS_DATA_SOURCE_SPEC["table_name"]
|
|
1830
|
+
]
|
|
1831
|
+
|
|
1832
|
+
tables_stg_ = [
|
|
1833
|
+
EFS_FEATURES_STAGING_SPEC['table_name'],
|
|
1834
|
+
EFS_ENTITY_STAGING_SPEC["table_name"],
|
|
1835
|
+
EFS_ENTITY_XREF_STAGING_SPEC["table_name"],
|
|
1836
|
+
EFS_DATA_SOURCE_STAGING_SPEC["table_name"],
|
|
1837
|
+
EFS_FEATURE_GROUP_STAGING_SPEC["table_name"],
|
|
1838
|
+
EFS_GROUP_FEATURES_STAGING_SPEC["table_name"]
|
|
1839
|
+
]
|
|
1840
|
+
|
|
1841
|
+
# Drop all the triggers first. So that tables can be dropped.
|
|
1842
|
+
triggers = ["{}_trg".format(table) for table in tables_]
|
|
1843
|
+
for trigger in triggers:
|
|
1844
|
+
execute_sql("drop trigger {}.{}".format(repo_name, trigger))
|
|
1845
|
+
|
|
1846
|
+
for table in (tables_ + [EFS_VERSION_SPEC["table_name"]] + tables_stg_):
|
|
1847
|
+
db_drop_table(table, schema_name=repo_name)
|
|
1848
|
+
|
|
1849
|
+
execute_sql("DROP DATABASE {}".format(repo_name))
|
|
1850
|
+
|
|
1851
|
+
return True
|
|
1852
|
+
|
|
1761
1853
|
def delete_feature(self, feature):
|
|
1762
1854
|
"""
|
|
1763
1855
|
DESCRIPTION:
|
|
@@ -2134,10 +2226,12 @@ class FeatureStore:
|
|
|
2134
2226
|
|
|
2135
2227
|
# Remove data for FeatureGroup.
|
|
2136
2228
|
_delete_data(table_name=self.__table_names["group_features_staging"],
|
|
2229
|
+
schema_name=self.__repo,
|
|
2137
2230
|
delete_conditions=(Col("group_name") == fg_name)
|
|
2138
2231
|
)
|
|
2139
2232
|
|
|
2140
2233
|
res = _delete_data(table_name=self.__table_names["feature_group_staging"],
|
|
2234
|
+
schema_name=self.__repo,
|
|
2141
2235
|
delete_conditions=(Col("name") == fg_name)
|
|
2142
2236
|
)
|
|
2143
2237
|
|
|
@@ -2175,25 +2269,26 @@ class FeatureStore:
|
|
|
2175
2269
|
fs.__get_features_df()
|
|
2176
2270
|
"""
|
|
2177
2271
|
if obj_type not in self.__df_container:
|
|
2272
|
+
from teradataml.dataframe.dataframe import in_schema
|
|
2178
2273
|
|
|
2179
2274
|
# For feature or feature_staging, join it with xref table
|
|
2180
2275
|
# so group name appears while listing features.
|
|
2181
2276
|
map_ = {"feature": "group_features", "feature_staging": "group_features_staging"}
|
|
2182
2277
|
if obj_type in map_:
|
|
2183
|
-
features = DataFrame(self.__table_names[obj_type])
|
|
2184
|
-
features_xref = DataFrame(self.__table_names[map_[obj_type]]).select(
|
|
2278
|
+
features = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
2279
|
+
features_xref = DataFrame(in_schema(self.__repo, self.__table_names[map_[obj_type]])).select(
|
|
2185
2280
|
["feature_name", "group_name"])
|
|
2186
2281
|
df = features.join(features_xref, on="name==feature_name", how='left')
|
|
2187
2282
|
self.__df_container[obj_type] = df.select(features.columns+["group_name"])
|
|
2188
2283
|
# For entity, join with xref table.
|
|
2189
2284
|
elif obj_type == "entity" or obj_type == "entity_staging":
|
|
2190
|
-
ent_df = DataFrame(self.__table_names[obj_type])
|
|
2191
|
-
xref_df = DataFrame(self.__table_names["{}_xref".format(obj_type)]).select(
|
|
2285
|
+
ent_df = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
2286
|
+
xref_df = DataFrame(in_schema(self.__repo, self.__table_names["{}_xref".format(obj_type)])).select(
|
|
2192
2287
|
['entity_name', 'entity_column'])
|
|
2193
2288
|
df = ent_df.join(xref_df, on="name==entity_name", how="inner")
|
|
2194
2289
|
self.__df_container[obj_type] = df.select(ent_df.columns+["entity_column"])
|
|
2195
2290
|
else:
|
|
2196
|
-
self.__df_container[obj_type] = DataFrame(self.__table_names[obj_type])
|
|
2291
|
+
self.__df_container[obj_type] = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
2197
2292
|
|
|
2198
2293
|
return self.__df_container[obj_type]
|
|
2199
2294
|
|
|
@@ -316,14 +316,6 @@ class Apply(TableOperator):
|
|
|
316
316
|
is_local_order,
|
|
317
317
|
sort_ascending,
|
|
318
318
|
nulls_first)
|
|
319
|
-
|
|
320
|
-
# Set the variable specific to this child class.
|
|
321
|
-
self.apply_command = apply_command
|
|
322
|
-
self.env_name = env_name if env_name is not None else get_user_env()
|
|
323
|
-
self.style = style
|
|
324
|
-
self.returns = returns
|
|
325
|
-
self._skip_argument_validation = False
|
|
326
|
-
|
|
327
319
|
# Create AnalyticsWrapperUtils instance which contains validation functions.
|
|
328
320
|
# This is required for is_default_or_not check.
|
|
329
321
|
# Rest all validation is done using _Validators
|
|
@@ -332,20 +324,42 @@ class Apply(TableOperator):
|
|
|
332
324
|
# Perform argument validation for arguments specific to this class.
|
|
333
325
|
self.__arg_info_matrix = []
|
|
334
326
|
|
|
335
|
-
self.__arg_info_matrix.append(["style",
|
|
336
|
-
self.__arg_info_matrix.append(["env_name",
|
|
337
|
-
self.__arg_info_matrix.append(["apply_command",
|
|
338
|
-
self.__arg_info_matrix.append(["returns",
|
|
339
|
-
|
|
327
|
+
self.__arg_info_matrix.append(["style", style, True, (str), True, ['CSV']])
|
|
328
|
+
self.__arg_info_matrix.append(["env_name", env_name, False, (str, UserEnv), True])
|
|
329
|
+
self.__arg_info_matrix.append(["apply_command", apply_command, False, (str), True])
|
|
330
|
+
self.__arg_info_matrix.append(["returns", returns, True, (dict), True])
|
|
331
|
+
self._skip_argument_validation = False
|
|
340
332
|
# Perform the function argument validations.
|
|
341
333
|
self.__apply__validate()
|
|
342
334
|
|
|
343
|
-
|
|
335
|
+
# If user do not pass environment, get the default environment.
|
|
336
|
+
if env_name is None:
|
|
337
|
+
env_name = get_user_env()
|
|
338
|
+
self._open_af_env = env_name
|
|
339
|
+
|
|
340
|
+
# Set the variable specific to this child class.
|
|
341
|
+
self.apply_command = apply_command
|
|
342
|
+
self.env_name = env_name if isinstance(env_name, str) else env_name.env_name
|
|
343
|
+
self.style = style
|
|
344
|
+
self.returns = returns
|
|
345
|
+
|
|
346
|
+
|
|
347
|
+
@property
|
|
348
|
+
def env(self):
|
|
349
|
+
"""
|
|
350
|
+
DESCRIPTION:
|
|
351
|
+
Getter to get environment.
|
|
352
|
+
|
|
353
|
+
RETURNS:
|
|
354
|
+
bool
|
|
355
|
+
|
|
356
|
+
RAISES:
|
|
357
|
+
None
|
|
358
|
+
"""
|
|
359
|
+
if isinstance(self._open_af_env, str):
|
|
360
|
+
self._open_af_env = get_env(self._open_af_env)
|
|
344
361
|
|
|
345
|
-
|
|
346
|
-
# remote user environment name as string.
|
|
347
|
-
if isinstance(self.env_name, UserEnv):
|
|
348
|
-
self.env_name = self.env_name.env_name
|
|
362
|
+
return self._open_af_env
|
|
349
363
|
|
|
350
364
|
@property
|
|
351
365
|
def skip_argument_validation(self):
|
|
@@ -1701,7 +1701,9 @@ class Script(TableOperator):
|
|
|
1701
1701
|
gc_on_quit=True, quote=False,
|
|
1702
1702
|
table_type=table_type)
|
|
1703
1703
|
try:
|
|
1704
|
-
if
|
|
1704
|
+
if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
|
|
1705
|
+
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query, volatile=True)
|
|
1706
|
+
elif output_style == OutputStyle.OUTPUT_TABLE.value:
|
|
1705
1707
|
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query)
|
|
1706
1708
|
else:
|
|
1707
1709
|
UtilFuncs._create_view(tblop_stdout_temp_tablename, self._tblop_query)
|
|
@@ -458,7 +458,9 @@ class TableOperator:
|
|
|
458
458
|
)
|
|
459
459
|
|
|
460
460
|
try:
|
|
461
|
-
if
|
|
461
|
+
if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
|
|
462
|
+
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query, volatile=True)
|
|
463
|
+
elif output_style == OutputStyle.OUTPUT_TABLE.value:
|
|
462
464
|
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query)
|
|
463
465
|
else:
|
|
464
466
|
UtilFuncs._create_view(tblop_stdout_temp_tablename, self._tblop_query)
|
teradataml/utils/dtypes.py
CHANGED
|
@@ -293,10 +293,57 @@ class _DtypesMappers:
|
|
|
293
293
|
VARCHAR: lambda x: "{0},{1}".format(x.__class__.__name__, x.length)
|
|
294
294
|
}
|
|
295
295
|
|
|
296
|
+
# Holds mapping between string representation of teradatasqlalchemy type
|
|
297
|
+
# and actual teradatasqlalchemy type.
|
|
298
|
+
DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER = {
|
|
299
|
+
"CHAR": CHAR,
|
|
300
|
+
"VARCHAR": VARCHAR,
|
|
301
|
+
|
|
302
|
+
"BYTEINT": BYTEINT,
|
|
303
|
+
"SMALLINT": SMALLINT,
|
|
304
|
+
"INTEGER": INTEGER,
|
|
305
|
+
"BIGINT": BIGINT,
|
|
306
|
+
|
|
307
|
+
"REAL": FLOAT,
|
|
308
|
+
"FLOAT": FLOAT,
|
|
309
|
+
"DOUBLE": FLOAT,
|
|
310
|
+
"DECIMAL": DECIMAL,
|
|
311
|
+
"NUMBER": NUMBER,
|
|
312
|
+
|
|
313
|
+
"DATE": DATE,
|
|
314
|
+
"TIME": TIME,
|
|
315
|
+
"TIMESTAMP": TIMESTAMP,
|
|
316
|
+
"TIMESTAMP_WTZ": TIMESTAMP,
|
|
317
|
+
|
|
318
|
+
"BYTE": BYTE,
|
|
319
|
+
"VARBYTE": VARBYTE,
|
|
320
|
+
"BLOB": BLOB,
|
|
321
|
+
# TODO: Add CLOB type when support is added from OTF.
|
|
322
|
+
|
|
323
|
+
# TODO: Check these types when corresponding data type support
|
|
324
|
+
# is available from OTF support or not.
|
|
325
|
+
"INTERVAL_YEAR": INTERVAL_YEAR,
|
|
326
|
+
"INTERVAL_YTM": INTERVAL_YEAR_TO_MONTH,
|
|
327
|
+
"INTERVAL_MONTH": INTERVAL_MONTH,
|
|
328
|
+
"INTERVAL_DAY": INTERVAL_DAY,
|
|
329
|
+
|
|
330
|
+
"INTERVAL_DTH": INTERVAL_DAY_TO_HOUR,
|
|
331
|
+
"INTERVAL_DTM": INTERVAL_DAY_TO_MINUTE,
|
|
332
|
+
"INTERVAL_DTS": INTERVAL_DAY_TO_SECOND,
|
|
333
|
+
"INTERVAL_HOUR": INTERVAL_HOUR,
|
|
334
|
+
"INTERVAL_HTM": INTERVAL_HOUR_TO_MINUTE,
|
|
335
|
+
"INTERVAL_HTS": INTERVAL_HOUR_TO_SECOND,
|
|
336
|
+
"INTERVAL_MINUTE": INTERVAL_MINUTE,
|
|
337
|
+
"INTERVAL_MTS": INTERVAL_MINUTE_TO_SECOND,
|
|
338
|
+
"INTERVAL_SECOND": INTERVAL_SECOND
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
|
|
296
342
|
class _SuppArgTypes:
|
|
297
343
|
VAL_ARG_DATATYPE = (str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
|
|
298
344
|
TIMESTAMP, VARCHAR)
|
|
299
345
|
|
|
346
|
+
|
|
300
347
|
class _Dtypes:
|
|
301
348
|
|
|
302
349
|
@staticmethod
|
|
@@ -82,3 +82,21 @@ class _InternalBuffer:
|
|
|
82
82
|
"""
|
|
83
83
|
if key in cls.__data:
|
|
84
84
|
return cls.__data.get(key)
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def remove_key(cls, key):
|
|
88
|
+
"""
|
|
89
|
+
DESCRIPTION:
|
|
90
|
+
Remove a particular key from the internal buffer.
|
|
91
|
+
|
|
92
|
+
RETURNS:
|
|
93
|
+
None
|
|
94
|
+
|
|
95
|
+
RAISES:
|
|
96
|
+
None
|
|
97
|
+
|
|
98
|
+
EXAMPLES:
|
|
99
|
+
# Remove all json objects from _InternalBuffer.
|
|
100
|
+
_InternalBuffer.remove_key("vs_session_id")
|
|
101
|
+
"""
|
|
102
|
+
del cls.__data[key]
|
teradataml/utils/validators.py
CHANGED
|
@@ -286,7 +286,8 @@ class _Validators:
|
|
|
286
286
|
|
|
287
287
|
@staticmethod
|
|
288
288
|
@skip_validation()
|
|
289
|
-
def _validate_dataframe_has_argument_columns(columns, column_arg, data, data_arg, is_partition_arg=False
|
|
289
|
+
def _validate_dataframe_has_argument_columns(columns, column_arg, data, data_arg, is_partition_arg=False,
|
|
290
|
+
case_insensitive=False):
|
|
290
291
|
"""
|
|
291
292
|
Function to check whether column names in columns are present in given dataframe or not.
|
|
292
293
|
This function is used currently only for Analytics wrappers.
|
|
@@ -312,12 +313,19 @@ class _Validators:
|
|
|
312
313
|
Specifies the name of the dataframe argument.
|
|
313
314
|
Types: str
|
|
314
315
|
|
|
315
|
-
|
|
316
|
+
is_partition_arg:
|
|
316
317
|
Optional Argument.
|
|
317
318
|
Specifies a bool argument notifying, whether argument being validate is
|
|
318
319
|
Partition argument or not.
|
|
319
320
|
Types: bool
|
|
320
321
|
|
|
322
|
+
case_insensitive:
|
|
323
|
+
Optional Argument.
|
|
324
|
+
Specifies a bool argument notifying, whether to check column names
|
|
325
|
+
in case-insensitive manner or not.
|
|
326
|
+
Default Value: False
|
|
327
|
+
Types: bool
|
|
328
|
+
|
|
321
329
|
RAISES:
|
|
322
330
|
TeradataMlException - TDMLDF_COLUMN_IN_ARG_NOT_FOUND column(s) does not exist in a dataframe.
|
|
323
331
|
|
|
@@ -359,7 +367,7 @@ class _Validators:
|
|
|
359
367
|
try:
|
|
360
368
|
# Check if its a sinlge column with one separator. For e.g. column:A.
|
|
361
369
|
# If yes, just continue.
|
|
362
|
-
_Validators._validate_column_exists_in_dataframe(column, data._metaexpr)
|
|
370
|
+
_Validators._validate_column_exists_in_dataframe(column, data._metaexpr, case_insensitive=case_insensitive)
|
|
363
371
|
continue
|
|
364
372
|
except:
|
|
365
373
|
# User has provided range value.
|
|
@@ -382,7 +390,8 @@ class _Validators:
|
|
|
382
390
|
total_columns.append(column)
|
|
383
391
|
|
|
384
392
|
return _Validators._validate_column_exists_in_dataframe(total_columns, data._metaexpr, column_arg=column_arg,
|
|
385
|
-
data_arg=data_arg)
|
|
393
|
+
data_arg=data_arg, case_insensitive=case_insensitive)
|
|
394
|
+
|
|
386
395
|
|
|
387
396
|
@staticmethod
|
|
388
397
|
@skip_validation()
|
|
@@ -1398,7 +1407,8 @@ class _Validators:
|
|
|
1398
1407
|
|
|
1399
1408
|
@staticmethod
|
|
1400
1409
|
@skip_validation()
|
|
1401
|
-
def _validate_unexpected_column_type(df, col, col_arg, unexpected_types, check_exist=True, raise_error=True
|
|
1410
|
+
def _validate_unexpected_column_type(df, col, col_arg, unexpected_types, check_exist=True, raise_error=True,
|
|
1411
|
+
case_insensitive=False):
|
|
1402
1412
|
"""
|
|
1403
1413
|
Internal function to validate the column existence and type of an input DataFrame column against
|
|
1404
1414
|
a list of unexpected types.
|
|
@@ -1464,7 +1474,7 @@ class _Validators:
|
|
|
1464
1474
|
|
|
1465
1475
|
# Check for column existence.
|
|
1466
1476
|
if check_exist:
|
|
1467
|
-
_Validators._validate_column_exists_in_dataframe(col, df._metaexpr)
|
|
1477
|
+
_Validators._validate_column_exists_in_dataframe(col, df._metaexpr, case_insensitive=case_insensitive)
|
|
1468
1478
|
|
|
1469
1479
|
if isinstance(df[col].type, unexpected_types):
|
|
1470
1480
|
if raise_error:
|
|
@@ -2303,7 +2313,56 @@ class _Validators:
|
|
|
2303
2313
|
>>> _Validators._check_auth_token("udf")
|
|
2304
2314
|
"""
|
|
2305
2315
|
if _InternalBuffer.get("auth_token") is None:
|
|
2306
|
-
raise TeradataMlException(Messages.get_message(MessageCodes.
|
|
2307
|
-
func_name
|
|
2316
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.SET_REQUIRED_PARAMS,\
|
|
2317
|
+
'Auth Token', func_name,
|
|
2318
|
+
'set_auth_token'),
|
|
2319
|
+
MessageCodes.SET_REQUIRED_PARAMS)
|
|
2308
2320
|
|
|
2309
|
-
return True
|
|
2321
|
+
return True
|
|
2322
|
+
|
|
2323
|
+
@staticmethod
|
|
2324
|
+
def _check_required_params(arg_value, arg_name, caller_func_name, target_func_name):
|
|
2325
|
+
"""
|
|
2326
|
+
DESCRIPTION:
|
|
2327
|
+
Check if the required argument is not None.
|
|
2328
|
+
|
|
2329
|
+
PARAMETERS:
|
|
2330
|
+
arg_value:
|
|
2331
|
+
Required Argument.
|
|
2332
|
+
Specifies the argument value to be
|
|
2333
|
+
checked for non None values.
|
|
2334
|
+
Types: str, float, int, bool
|
|
2335
|
+
|
|
2336
|
+
arg_name:
|
|
2337
|
+
Required Argument.
|
|
2338
|
+
Specifies the argument name.
|
|
2339
|
+
Types: str
|
|
2340
|
+
|
|
2341
|
+
caller_func_name:
|
|
2342
|
+
Required Argument.
|
|
2343
|
+
Specifies the function name which calls this function.
|
|
2344
|
+
This is required for the error message.
|
|
2345
|
+
Types: str
|
|
2346
|
+
|
|
2347
|
+
target_func_name:
|
|
2348
|
+
Required Argument.
|
|
2349
|
+
Specifies the function name which the user needs to call
|
|
2350
|
+
so that the error is fixed.
|
|
2351
|
+
This is required for the error message.
|
|
2352
|
+
Types: str
|
|
2353
|
+
|
|
2354
|
+
RAISES:
|
|
2355
|
+
TeradataMLException
|
|
2356
|
+
|
|
2357
|
+
RETURNS:
|
|
2358
|
+
True.
|
|
2359
|
+
|
|
2360
|
+
EXAMPLES:
|
|
2361
|
+
>>> _Validators._check_required_params("udf", "arg_name")
|
|
2362
|
+
"""
|
|
2363
|
+
if arg_value is None:
|
|
2364
|
+
raise TeradataMlException(Messages.get_message(MessageCodes.SET_REQUIRED_PARAMS, \
|
|
2365
|
+
arg_name, caller_func_name,
|
|
2366
|
+
target_func_name),
|
|
2367
|
+
MessageCodes.SET_REQUIRED_PARAMS)
|
|
2368
|
+
return True
|