teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +193 -1
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +25 -18
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +20 -2
- teradataml/analytics/utils.py +15 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +341 -112
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +84 -42
- teradataml/automl/data_transformation.py +69 -33
- teradataml/automl/feature_engineering.py +76 -9
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +35 -14
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +122 -63
- teradataml/common/messagecodes.py +14 -3
- teradataml/common/messages.py +8 -4
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +366 -74
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +348 -86
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +45 -29
- teradataml/dataframe/data_transfer.py +72 -46
- teradataml/dataframe/dataframe.py +642 -166
- teradataml/dataframe/dataframe_utils.py +167 -22
- teradataml/dataframe/functions.py +135 -20
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +330 -78
- teradataml/dbutils/dbutils.py +556 -140
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -26
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +307 -40
- teradataml/scriptmgmt/lls_utils.py +428 -145
- teradataml/store/__init__.py +2 -3
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +48 -19
- teradataml/table_operators/Script.py +23 -2
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +49 -1
- teradataml/utils/internal_buffer.py +38 -0
- teradataml/utils/validators.py +377 -62
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
teradataml/store/__init__.py
CHANGED
|
@@ -5,9 +5,8 @@ TERADATA CORPORATION CONFIDENTIAL AND TRADE SECRET
|
|
|
5
5
|
Primary Owner: pradeep.garre@teradata.com
|
|
6
6
|
Secondary Owner: aanchal.kavedia@teradata.com
|
|
7
7
|
|
|
8
|
-
This file imports components from Feature Store
|
|
8
|
+
This file imports components from Feature Store.
|
|
9
9
|
"""
|
|
10
10
|
|
|
11
11
|
from teradataml.store.feature_store.feature_store import FeatureStore
|
|
12
|
-
from teradataml.store.feature_store.models import *
|
|
13
|
-
from teradataml.store.vector_store import *
|
|
12
|
+
from teradataml.store.feature_store.models import *
|
|
@@ -66,8 +66,7 @@ class FeatureStore:
|
|
|
66
66
|
self.__df_container = {}
|
|
67
67
|
|
|
68
68
|
# Store the table names here. Then use this where ever required.
|
|
69
|
-
self.__table_names =
|
|
70
|
-
for name, table_name in EFS_TABLES.items()}
|
|
69
|
+
self.__table_names = EFS_TABLES
|
|
71
70
|
|
|
72
71
|
# Declare getter's for getting the corresponding DataFrame's.
|
|
73
72
|
self.__get_features_df = lambda : self.__get_obj_df("feature")
|
|
@@ -1560,6 +1559,7 @@ class FeatureStore:
|
|
|
1560
1559
|
table_name = self.__table_names["{}_staging".format(type_)]
|
|
1561
1560
|
|
|
1562
1561
|
res = _delete_data(table_name=table_name,
|
|
1562
|
+
schema_name=self.__repo,
|
|
1563
1563
|
delete_conditions=(Col("name") == name)
|
|
1564
1564
|
)
|
|
1565
1565
|
|
|
@@ -1606,11 +1606,13 @@ class FeatureStore:
|
|
|
1606
1606
|
|
|
1607
1607
|
# remove it from xref table first.
|
|
1608
1608
|
_delete_data(table_name=ent_table_xref,
|
|
1609
|
+
schema_name=self.__repo,
|
|
1609
1610
|
delete_conditions=(Col("entity_name") == name)
|
|
1610
1611
|
)
|
|
1611
1612
|
|
|
1612
1613
|
# remove from entity table.
|
|
1613
1614
|
res = _delete_data(table_name=ent_table,
|
|
1615
|
+
schema_name=self.__repo,
|
|
1614
1616
|
delete_conditions=(Col("name") == name)
|
|
1615
1617
|
)
|
|
1616
1618
|
|
|
@@ -1758,6 +1760,96 @@ class FeatureStore:
|
|
|
1758
1760
|
"""
|
|
1759
1761
|
return self.__remove_obj(name=feature, type_="feature")
|
|
1760
1762
|
|
|
1763
|
+
def delete(self):
|
|
1764
|
+
"""
|
|
1765
|
+
DESCRIPTION:
|
|
1766
|
+
Removes the FeatureStore and its components from repository.
|
|
1767
|
+
Notes:
|
|
1768
|
+
* The function removes all the associated database objects along with data.
|
|
1769
|
+
Be cautious while using this function.
|
|
1770
|
+
* The function tries to remove the underlying Database also once
|
|
1771
|
+
all the Feature Store objects are removed.
|
|
1772
|
+
* The user must have permission on the database used by this Feature Store
|
|
1773
|
+
* to drop triggers.
|
|
1774
|
+
* to drop the tables.
|
|
1775
|
+
* to drop the Database.
|
|
1776
|
+
* If the user lacks any of the mentioned permissions, Teradata recommends
|
|
1777
|
+
to not use this function.
|
|
1778
|
+
|
|
1779
|
+
PARAMETERS:
|
|
1780
|
+
None
|
|
1781
|
+
|
|
1782
|
+
RETURNS:
|
|
1783
|
+
bool.
|
|
1784
|
+
|
|
1785
|
+
RAISES:
|
|
1786
|
+
None
|
|
1787
|
+
|
|
1788
|
+
EXAMPLES:
|
|
1789
|
+
# Setup FeatureStore for repo 'vfs_v1'.
|
|
1790
|
+
>>> from teradataml import FeatureStore
|
|
1791
|
+
>>> fs = FeatureStore("vfs_v1")
|
|
1792
|
+
>>> fs.setup()
|
|
1793
|
+
True
|
|
1794
|
+
>>> # Delete FeatureStore.
|
|
1795
|
+
>>> fs.delete()
|
|
1796
|
+
True
|
|
1797
|
+
>>>
|
|
1798
|
+
"""
|
|
1799
|
+
confirmation = input("The function removes Feature Store and drops the "
|
|
1800
|
+
"corresponding repo also. Are you sure you want to proceed? (Y/N): ")
|
|
1801
|
+
|
|
1802
|
+
if confirmation in ["Y", "y"]:
|
|
1803
|
+
return self.__drop_feature_store_objects(self.__repo)
|
|
1804
|
+
|
|
1805
|
+
return False
|
|
1806
|
+
|
|
1807
|
+
@staticmethod
|
|
1808
|
+
def __drop_feature_store_objects(repo_name):
|
|
1809
|
+
"""
|
|
1810
|
+
DESCRIPTION:
|
|
1811
|
+
Removes the FeatureStore and it's components from repository.
|
|
1812
|
+
|
|
1813
|
+
PARAMETERS:
|
|
1814
|
+
repo_name:
|
|
1815
|
+
Required Argument.
|
|
1816
|
+
Specifies the name of the repository.
|
|
1817
|
+
Types: str
|
|
1818
|
+
|
|
1819
|
+
RETURNS:
|
|
1820
|
+
bool
|
|
1821
|
+
"""
|
|
1822
|
+
# Drop all the tables and staging tables.
|
|
1823
|
+
tables_ = [
|
|
1824
|
+
EFS_GROUP_FEATURES_SPEC["table_name"],
|
|
1825
|
+
EFS_FEATURE_GROUP_SPEC["table_name"],
|
|
1826
|
+
EFS_FEATURES_SPEC['table_name'],
|
|
1827
|
+
EFS_ENTITY_XREF_SPEC['table_name'],
|
|
1828
|
+
EFS_ENTITY_SPEC["table_name"],
|
|
1829
|
+
EFS_DATA_SOURCE_SPEC["table_name"]
|
|
1830
|
+
]
|
|
1831
|
+
|
|
1832
|
+
tables_stg_ = [
|
|
1833
|
+
EFS_FEATURES_STAGING_SPEC['table_name'],
|
|
1834
|
+
EFS_ENTITY_STAGING_SPEC["table_name"],
|
|
1835
|
+
EFS_ENTITY_XREF_STAGING_SPEC["table_name"],
|
|
1836
|
+
EFS_DATA_SOURCE_STAGING_SPEC["table_name"],
|
|
1837
|
+
EFS_FEATURE_GROUP_STAGING_SPEC["table_name"],
|
|
1838
|
+
EFS_GROUP_FEATURES_STAGING_SPEC["table_name"]
|
|
1839
|
+
]
|
|
1840
|
+
|
|
1841
|
+
# Drop all the triggers first. So that tables can be dropped.
|
|
1842
|
+
triggers = ["{}_trg".format(table) for table in tables_]
|
|
1843
|
+
for trigger in triggers:
|
|
1844
|
+
execute_sql("drop trigger {}.{}".format(repo_name, trigger))
|
|
1845
|
+
|
|
1846
|
+
for table in (tables_ + [EFS_VERSION_SPEC["table_name"]] + tables_stg_):
|
|
1847
|
+
db_drop_table(table, schema_name=repo_name)
|
|
1848
|
+
|
|
1849
|
+
execute_sql("DROP DATABASE {}".format(repo_name))
|
|
1850
|
+
|
|
1851
|
+
return True
|
|
1852
|
+
|
|
1761
1853
|
def delete_feature(self, feature):
|
|
1762
1854
|
"""
|
|
1763
1855
|
DESCRIPTION:
|
|
@@ -2134,10 +2226,12 @@ class FeatureStore:
|
|
|
2134
2226
|
|
|
2135
2227
|
# Remove data for FeatureGroup.
|
|
2136
2228
|
_delete_data(table_name=self.__table_names["group_features_staging"],
|
|
2229
|
+
schema_name=self.__repo,
|
|
2137
2230
|
delete_conditions=(Col("group_name") == fg_name)
|
|
2138
2231
|
)
|
|
2139
2232
|
|
|
2140
2233
|
res = _delete_data(table_name=self.__table_names["feature_group_staging"],
|
|
2234
|
+
schema_name=self.__repo,
|
|
2141
2235
|
delete_conditions=(Col("name") == fg_name)
|
|
2142
2236
|
)
|
|
2143
2237
|
|
|
@@ -2175,25 +2269,26 @@ class FeatureStore:
|
|
|
2175
2269
|
fs.__get_features_df()
|
|
2176
2270
|
"""
|
|
2177
2271
|
if obj_type not in self.__df_container:
|
|
2272
|
+
from teradataml.dataframe.dataframe import in_schema
|
|
2178
2273
|
|
|
2179
2274
|
# For feature or feature_staging, join it with xref table
|
|
2180
2275
|
# so group name appears while listing features.
|
|
2181
2276
|
map_ = {"feature": "group_features", "feature_staging": "group_features_staging"}
|
|
2182
2277
|
if obj_type in map_:
|
|
2183
|
-
features = DataFrame(self.__table_names[obj_type])
|
|
2184
|
-
features_xref = DataFrame(self.__table_names[map_[obj_type]]).select(
|
|
2278
|
+
features = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
2279
|
+
features_xref = DataFrame(in_schema(self.__repo, self.__table_names[map_[obj_type]])).select(
|
|
2185
2280
|
["feature_name", "group_name"])
|
|
2186
2281
|
df = features.join(features_xref, on="name==feature_name", how='left')
|
|
2187
2282
|
self.__df_container[obj_type] = df.select(features.columns+["group_name"])
|
|
2188
2283
|
# For entity, join with xref table.
|
|
2189
2284
|
elif obj_type == "entity" or obj_type == "entity_staging":
|
|
2190
|
-
ent_df = DataFrame(self.__table_names[obj_type])
|
|
2191
|
-
xref_df = DataFrame(self.__table_names["{}_xref".format(obj_type)]).select(
|
|
2285
|
+
ent_df = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
2286
|
+
xref_df = DataFrame(in_schema(self.__repo, self.__table_names["{}_xref".format(obj_type)])).select(
|
|
2192
2287
|
['entity_name', 'entity_column'])
|
|
2193
2288
|
df = ent_df.join(xref_df, on="name==entity_name", how="inner")
|
|
2194
2289
|
self.__df_container[obj_type] = df.select(ent_df.columns+["entity_column"])
|
|
2195
2290
|
else:
|
|
2196
|
-
self.__df_container[obj_type] = DataFrame(self.__table_names[obj_type])
|
|
2291
|
+
self.__df_container[obj_type] = DataFrame(in_schema(self.__repo, self.__table_names[obj_type]))
|
|
2197
2292
|
|
|
2198
2293
|
return self.__df_container[obj_type]
|
|
2199
2294
|
|
|
@@ -51,7 +51,8 @@ class Apply(TableOperator):
|
|
|
51
51
|
data_order_column=None,
|
|
52
52
|
is_local_order=False,
|
|
53
53
|
sort_ascending=True,
|
|
54
|
-
nulls_first=True
|
|
54
|
+
nulls_first=True,
|
|
55
|
+
**kwargs):
|
|
55
56
|
"""
|
|
56
57
|
DESCRIPTION:
|
|
57
58
|
The fastpath Apply table operator executes a user-installed script or
|
|
@@ -316,14 +317,6 @@ class Apply(TableOperator):
|
|
|
316
317
|
is_local_order,
|
|
317
318
|
sort_ascending,
|
|
318
319
|
nulls_first)
|
|
319
|
-
|
|
320
|
-
# Set the variable specific to this child class.
|
|
321
|
-
self.apply_command = apply_command
|
|
322
|
-
self.env_name = env_name if env_name is not None else get_user_env()
|
|
323
|
-
self.style = style
|
|
324
|
-
self.returns = returns
|
|
325
|
-
self._skip_argument_validation = False
|
|
326
|
-
|
|
327
320
|
# Create AnalyticsWrapperUtils instance which contains validation functions.
|
|
328
321
|
# This is required for is_default_or_not check.
|
|
329
322
|
# Rest all validation is done using _Validators
|
|
@@ -332,20 +325,56 @@ class Apply(TableOperator):
|
|
|
332
325
|
# Perform argument validation for arguments specific to this class.
|
|
333
326
|
self.__arg_info_matrix = []
|
|
334
327
|
|
|
335
|
-
self.__arg_info_matrix.append(["style",
|
|
336
|
-
self.__arg_info_matrix.append(["env_name",
|
|
337
|
-
self.__arg_info_matrix.append(["apply_command",
|
|
338
|
-
self.__arg_info_matrix.append(["returns",
|
|
339
|
-
|
|
328
|
+
self.__arg_info_matrix.append(["style", style, True, (str), True, ['CSV']])
|
|
329
|
+
self.__arg_info_matrix.append(["env_name", env_name, False, (str, UserEnv), True])
|
|
330
|
+
self.__arg_info_matrix.append(["apply_command", apply_command, False, (str), True])
|
|
331
|
+
self.__arg_info_matrix.append(["returns", returns, True, (dict), True])
|
|
332
|
+
self._skip_argument_validation = False
|
|
340
333
|
# Perform the function argument validations.
|
|
341
334
|
self.__apply__validate()
|
|
342
335
|
|
|
343
|
-
|
|
336
|
+
# If user do not pass environment, get the default environment.
|
|
337
|
+
if env_name is None:
|
|
338
|
+
env_name = get_user_env()
|
|
339
|
+
self._open_af_env = env_name
|
|
340
|
+
|
|
341
|
+
# Set the variable specific to this child class.
|
|
342
|
+
self.apply_command = apply_command
|
|
343
|
+
self.env_name = env_name if isinstance(env_name, str) else env_name.env_name
|
|
344
|
+
self.style = style
|
|
345
|
+
self.returns = returns
|
|
346
|
+
|
|
347
|
+
# Internal variable to check if validation is required for Python and python package versions mismatch.
|
|
348
|
+
_validation_required = kwargs.pop('_validate_version', False)
|
|
349
|
+
# Interval variable to store the function name for which validation is required.
|
|
350
|
+
_func_name = kwargs.pop('_func_name', None)
|
|
351
|
+
# Internal variable to store the list of packages required for the function.
|
|
352
|
+
_packages = kwargs.pop('_packages', None)
|
|
353
|
+
|
|
354
|
+
# Check if validation for Python and python package versions mismatch is required.
|
|
355
|
+
if _validation_required:
|
|
356
|
+
# Check if the Python interpreter major versions are consistent between Vantage and local.
|
|
357
|
+
UtilFuncs._check_python_version_diff(self.env_name)
|
|
358
|
+
# Check if the package versions are consistent between Vantage and local.
|
|
359
|
+
UtilFuncs._check_package_version_diff(_func_name, _packages, self.env_name)
|
|
360
|
+
|
|
361
|
+
|
|
362
|
+
@property
|
|
363
|
+
def env(self):
|
|
364
|
+
"""
|
|
365
|
+
DESCRIPTION:
|
|
366
|
+
Getter to get environment.
|
|
367
|
+
|
|
368
|
+
RETURNS:
|
|
369
|
+
bool
|
|
370
|
+
|
|
371
|
+
RAISES:
|
|
372
|
+
None
|
|
373
|
+
"""
|
|
374
|
+
if isinstance(self._open_af_env, str):
|
|
375
|
+
self._open_af_env = get_env(self._open_af_env)
|
|
344
376
|
|
|
345
|
-
|
|
346
|
-
# remote user environment name as string.
|
|
347
|
-
if isinstance(self.env_name, UserEnv):
|
|
348
|
-
self.env_name = self.env_name.env_name
|
|
377
|
+
return self._open_af_env
|
|
349
378
|
|
|
350
379
|
@property
|
|
351
380
|
def skip_argument_validation(self):
|
|
@@ -67,7 +67,8 @@ class Script(TableOperator):
|
|
|
67
67
|
data_order_column=None,
|
|
68
68
|
is_local_order=False,
|
|
69
69
|
sort_ascending=True,
|
|
70
|
-
nulls_first=True
|
|
70
|
+
nulls_first=True,
|
|
71
|
+
**kwargs):
|
|
71
72
|
"""
|
|
72
73
|
DESCRIPTION:
|
|
73
74
|
The Script table operator function executes a user-installed script or
|
|
@@ -354,6 +355,24 @@ class Script(TableOperator):
|
|
|
354
355
|
# Perform the function validations
|
|
355
356
|
self.__validate()
|
|
356
357
|
|
|
358
|
+
# Add the prefix OPENBLAS_NUM_THREADS to the script command.
|
|
359
|
+
self.script_command = f"{TableOperatorConstants.OPENBLAS_NUM_THREADS.value} {self.script_command}"
|
|
360
|
+
|
|
361
|
+
# Internal variable to check if validation is required for Python and python package versions mismatch.
|
|
362
|
+
_validation_required = kwargs.pop('_validate_version', False)
|
|
363
|
+
# Interval variable to store the function name for which validation is required.
|
|
364
|
+
_func_name = kwargs.pop('_func_name', None)
|
|
365
|
+
# Internal variable to store the list of packages required for the function.
|
|
366
|
+
_packages = kwargs.pop('_packages', None)
|
|
367
|
+
|
|
368
|
+
# Check if validation for Python and python package versions mismatch is required.
|
|
369
|
+
if _validation_required:
|
|
370
|
+
# Check if the Python interpreter major versions are consistent between Vantage and local.
|
|
371
|
+
UtilFuncs._check_python_version_diff()
|
|
372
|
+
# Check if the package versions are consistent between Vantage and local.
|
|
373
|
+
UtilFuncs._check_package_version_diff(_func_name, _packages)
|
|
374
|
+
|
|
375
|
+
|
|
357
376
|
@property
|
|
358
377
|
def skip_argument_validation(self):
|
|
359
378
|
"""
|
|
@@ -1701,7 +1720,9 @@ class Script(TableOperator):
|
|
|
1701
1720
|
gc_on_quit=True, quote=False,
|
|
1702
1721
|
table_type=table_type)
|
|
1703
1722
|
try:
|
|
1704
|
-
if
|
|
1723
|
+
if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
|
|
1724
|
+
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query, volatile=True)
|
|
1725
|
+
elif output_style == OutputStyle.OUTPUT_TABLE.value:
|
|
1705
1726
|
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query)
|
|
1706
1727
|
else:
|
|
1707
1728
|
UtilFuncs._create_view(tblop_stdout_temp_tablename, self._tblop_query)
|
|
@@ -458,7 +458,9 @@ class TableOperator:
|
|
|
458
458
|
)
|
|
459
459
|
|
|
460
460
|
try:
|
|
461
|
-
if
|
|
461
|
+
if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
|
|
462
|
+
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query, volatile=True)
|
|
463
|
+
elif output_style == OutputStyle.OUTPUT_TABLE.value:
|
|
462
464
|
UtilFuncs._create_table(tblop_stdout_temp_tablename, self._tblop_query)
|
|
463
465
|
else:
|
|
464
466
|
UtilFuncs._create_view(tblop_stdout_temp_tablename, self._tblop_query)
|
|
@@ -249,6 +249,9 @@ class _TableOperatorUtils:
|
|
|
249
249
|
self.data_partition_column = data_partition_column
|
|
250
250
|
self.data_hash_column = data_hash_column
|
|
251
251
|
self.__env = None
|
|
252
|
+
self.debug = kwargs.pop('debug', False)
|
|
253
|
+
self._validate_version = False
|
|
254
|
+
self.packages = []
|
|
252
255
|
# Add all entries from kwargs as class attributes.
|
|
253
256
|
self.__dict__.update(kwargs)
|
|
254
257
|
|
|
@@ -464,7 +467,10 @@ class _TableOperatorUtils:
|
|
|
464
467
|
# Remove local copy of file to free up the disk space immediately.
|
|
465
468
|
# Garbage collection will take care of it as a failsafe.
|
|
466
469
|
# We may end up here after the script was created, but even before executing it.
|
|
467
|
-
|
|
470
|
+
if not self.debug:
|
|
471
|
+
GarbageCollector._delete_local_file(self.script_path)
|
|
472
|
+
elif not self.__dict__.get('is_printed', False):
|
|
473
|
+
self._print_script_path()
|
|
468
474
|
|
|
469
475
|
def __get_script_name(self):
|
|
470
476
|
"""
|
|
@@ -486,7 +492,7 @@ class _TableOperatorUtils:
|
|
|
486
492
|
script_entry, script_alias, script_name, script_base_name = self.__get_script_name()
|
|
487
493
|
"""
|
|
488
494
|
script_entry = UtilFuncs._generate_temp_script_name(prefix="" if self.operation is None else self.operation,
|
|
489
|
-
extension="py")
|
|
495
|
+
extension="py", gc_on_quit=(self.debug^True))
|
|
490
496
|
# script_alias is the file ID.
|
|
491
497
|
script_alias = UtilFuncs._teradata_unquote_arg(UtilFuncs._extract_table_name(script_entry), quote='"')
|
|
492
498
|
|
|
@@ -530,6 +536,12 @@ class _TableOperatorUtils:
|
|
|
530
536
|
# User can provide the installation location of previous version in case they are not using the latest.
|
|
531
537
|
# This fix is done for ELE-5958 - https://teradata-pe.atlassian.net/browse/ELE-5958
|
|
532
538
|
|
|
539
|
+
# If operation is map_row or map_partition, then it should validate the python and
|
|
540
|
+
# 'dill' package version mismatch.
|
|
541
|
+
if self.operation in ["map_row", "map_partition"]:
|
|
542
|
+
self._validate_version = True
|
|
543
|
+
self.packages = ['dill']
|
|
544
|
+
|
|
533
545
|
from teradataml.table_operators.Script import Script
|
|
534
546
|
table_op_obj = Script(data=self.data,
|
|
535
547
|
script_name=self.script_name,
|
|
@@ -546,7 +558,10 @@ class _TableOperatorUtils:
|
|
|
546
558
|
nulls_first=self.nulls_first,
|
|
547
559
|
charset=self.charset,
|
|
548
560
|
data_partition_column=self.data_partition_column,
|
|
549
|
-
data_hash_column=self.data_hash_column
|
|
561
|
+
data_hash_column=self.data_hash_column,
|
|
562
|
+
_validate_version = self._validate_version,
|
|
563
|
+
_func_name = self.operation,
|
|
564
|
+
_packages = self.packages
|
|
550
565
|
)
|
|
551
566
|
table_op_obj.check_reserved_keyword = check_reserved_keyword
|
|
552
567
|
|
|
@@ -588,6 +603,12 @@ class _TableOperatorUtils:
|
|
|
588
603
|
EXAMPLES:
|
|
589
604
|
return_obj = self.__execute_apply_table_operator()
|
|
590
605
|
"""
|
|
606
|
+
|
|
607
|
+
# If operation is apply, then it should validate the python and 'dill' package version mismatch.
|
|
608
|
+
if self.operation == "apply":
|
|
609
|
+
self._validate_version = True
|
|
610
|
+
self.packages = ['dill']
|
|
611
|
+
|
|
591
612
|
# First create Apply Table operator object so that validations are done on inputs.
|
|
592
613
|
from teradataml.table_operators.Apply import Apply
|
|
593
614
|
apply_op_obj = Apply(data=self.data,
|
|
@@ -604,7 +625,10 @@ class _TableOperatorUtils:
|
|
|
604
625
|
nulls_first=self.nulls_first,
|
|
605
626
|
data_partition_column=self.data_partition_column,
|
|
606
627
|
data_hash_column=self.data_hash_column,
|
|
607
|
-
style=self.style
|
|
628
|
+
style=self.style,
|
|
629
|
+
_validate_version=self._validate_version,
|
|
630
|
+
_func_name = self.operation,
|
|
631
|
+
_packages = self.packages
|
|
608
632
|
)
|
|
609
633
|
|
|
610
634
|
# APPLY operator requires installation and deletion of script file.
|
|
@@ -670,8 +694,33 @@ class _TableOperatorUtils:
|
|
|
670
694
|
TableOperatorConstants.MAP_PARTITION_OP.value,
|
|
671
695
|
TableOperatorConstants.APPLY_OP.value,
|
|
672
696
|
TableOperatorConstants.UDF_OP.value]:
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
|
|
676
|
-
|
|
677
|
-
|
|
697
|
+
if self.debug:
|
|
698
|
+
self._print_script_path()
|
|
699
|
+
self.is_printed = True
|
|
700
|
+
else:
|
|
701
|
+
GarbageCollector._delete_object_entry(
|
|
702
|
+
object_to_delete=self.script_entry,
|
|
703
|
+
object_type=TeradataConstants.TERADATA_SCRIPT,
|
|
704
|
+
remove_entry_from_gc_list=True
|
|
705
|
+
)
|
|
706
|
+
def _print_script_path(self):
|
|
707
|
+
"""
|
|
708
|
+
DESCRIPTION:
|
|
709
|
+
Internal function to print the path of the script file.
|
|
710
|
+
|
|
711
|
+
PARAMETERS:
|
|
712
|
+
None.
|
|
713
|
+
|
|
714
|
+
RAISES:
|
|
715
|
+
None.
|
|
716
|
+
|
|
717
|
+
RETURNS:
|
|
718
|
+
None.
|
|
719
|
+
"""
|
|
720
|
+
mssg1 = f"Path for the script {self.script_path}"
|
|
721
|
+
mssg2 = "The user should delete the script file since it is not being garbage collected."
|
|
722
|
+
mssg_len = max(len(mssg1), len(mssg2))
|
|
723
|
+
print("-" * mssg_len)
|
|
724
|
+
print(mssg1)
|
|
725
|
+
print(mssg2)
|
|
726
|
+
print("-" * mssg_len)
|
teradataml/utils/dtypes.py
CHANGED
|
@@ -8,6 +8,7 @@ from teradatasqlalchemy import (INTERVAL_YEAR, INTERVAL_YEAR_TO_MONTH, INTERVAL_
|
|
|
8
8
|
INTERVAL_HOUR, INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
|
|
9
9
|
INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND, INTERVAL_SECOND)
|
|
10
10
|
from teradatasqlalchemy import (GEOMETRY, MBB, MBR)
|
|
11
|
+
from teradatasqlalchemy import VECTOR
|
|
11
12
|
from teradataml.common.td_coltype_code_to_tdtype import HELP_COL_TYPE_TO_TDTYPE
|
|
12
13
|
from teradataml.common.constants import TeradataTypes, PythonTypes
|
|
13
14
|
from datetime import datetime, time, date
|
|
@@ -148,7 +149,7 @@ _GET_DATATYPES = {
|
|
|
148
149
|
INTERVAL_DAY_TO_SECOND, INTERVAL_HOUR, INTERVAL_HOUR_TO_MINUTE, INTERVAL_HOUR_TO_SECOND,
|
|
149
150
|
INTERVAL_MINUTE, INTERVAL_MINUTE_TO_SECOND, INTERVAL_MONTH, INTERVAL_SECOND, INTERVAL_YEAR,
|
|
150
151
|
INTERVAL_YEAR_TO_MONTH, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP, TIME, TIMESTAMP, VARBYTE,
|
|
151
|
-
VARCHAR, GEOMETRY, MBB, MBR],
|
|
152
|
+
VARCHAR, GEOMETRY, MBB, MBR, VECTOR],
|
|
152
153
|
'NON_NUM_DATE_INTERVAL': [BLOB, BYTE, CHAR, CLOB, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP, TIME, TIMESTAMP,
|
|
153
154
|
VARBYTE, VARCHAR, GEOMETRY, MBB, MBR],
|
|
154
155
|
'NON_NUM_INTERVAL': [BLOB, BYTE, CHAR, CLOB, DATE, PERIOD_DATE, PERIOD_TIME, PERIOD_TIMESTAMP, TIME, TIMESTAMP,
|
|
@@ -293,10 +294,57 @@ class _DtypesMappers:
|
|
|
293
294
|
VARCHAR: lambda x: "{0},{1}".format(x.__class__.__name__, x.length)
|
|
294
295
|
}
|
|
295
296
|
|
|
297
|
+
# Holds mapping between string representation of teradatasqlalchemy type
|
|
298
|
+
# and actual teradatasqlalchemy type.
|
|
299
|
+
DATALAKE_STR_to_TDSQLALCHEMY_DATATYPE_MAPPER = {
|
|
300
|
+
"CHAR": CHAR,
|
|
301
|
+
"VARCHAR": VARCHAR,
|
|
302
|
+
|
|
303
|
+
"BYTEINT": BYTEINT,
|
|
304
|
+
"SMALLINT": SMALLINT,
|
|
305
|
+
"INTEGER": INTEGER,
|
|
306
|
+
"BIGINT": BIGINT,
|
|
307
|
+
|
|
308
|
+
"REAL": FLOAT,
|
|
309
|
+
"FLOAT": FLOAT,
|
|
310
|
+
"DOUBLE": FLOAT,
|
|
311
|
+
"DECIMAL": DECIMAL,
|
|
312
|
+
"NUMBER": NUMBER,
|
|
313
|
+
|
|
314
|
+
"DATE": DATE,
|
|
315
|
+
"TIME": TIME,
|
|
316
|
+
"TIMESTAMP": TIMESTAMP,
|
|
317
|
+
"TIMESTAMP_WTZ": TIMESTAMP,
|
|
318
|
+
|
|
319
|
+
"BYTE": BYTE,
|
|
320
|
+
"VARBYTE": VARBYTE,
|
|
321
|
+
"BLOB": BLOB,
|
|
322
|
+
# TODO: Add CLOB type when support is added from OTF.
|
|
323
|
+
|
|
324
|
+
# TODO: Check these types when corresponding data type support
|
|
325
|
+
# is available from OTF support or not.
|
|
326
|
+
"INTERVAL_YEAR": INTERVAL_YEAR,
|
|
327
|
+
"INTERVAL_YTM": INTERVAL_YEAR_TO_MONTH,
|
|
328
|
+
"INTERVAL_MONTH": INTERVAL_MONTH,
|
|
329
|
+
"INTERVAL_DAY": INTERVAL_DAY,
|
|
330
|
+
|
|
331
|
+
"INTERVAL_DTH": INTERVAL_DAY_TO_HOUR,
|
|
332
|
+
"INTERVAL_DTM": INTERVAL_DAY_TO_MINUTE,
|
|
333
|
+
"INTERVAL_DTS": INTERVAL_DAY_TO_SECOND,
|
|
334
|
+
"INTERVAL_HOUR": INTERVAL_HOUR,
|
|
335
|
+
"INTERVAL_HTM": INTERVAL_HOUR_TO_MINUTE,
|
|
336
|
+
"INTERVAL_HTS": INTERVAL_HOUR_TO_SECOND,
|
|
337
|
+
"INTERVAL_MINUTE": INTERVAL_MINUTE,
|
|
338
|
+
"INTERVAL_MTS": INTERVAL_MINUTE_TO_SECOND,
|
|
339
|
+
"INTERVAL_SECOND": INTERVAL_SECOND
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
|
|
296
343
|
class _SuppArgTypes:
|
|
297
344
|
VAL_ARG_DATATYPE = (str, BIGINT, BYTEINT, CHAR, DATE, DECIMAL, FLOAT, INTEGER, NUMBER, SMALLINT, TIME,
|
|
298
345
|
TIMESTAMP, VARCHAR)
|
|
299
346
|
|
|
347
|
+
|
|
300
348
|
class _Dtypes:
|
|
301
349
|
|
|
302
350
|
@staticmethod
|
|
@@ -82,3 +82,41 @@ class _InternalBuffer:
|
|
|
82
82
|
"""
|
|
83
83
|
if key in cls.__data:
|
|
84
84
|
return cls.__data.get(key)
|
|
85
|
+
|
|
86
|
+
@classmethod
|
|
87
|
+
def remove_key(cls, key):
|
|
88
|
+
"""
|
|
89
|
+
DESCRIPTION:
|
|
90
|
+
Remove a particular key from the internal buffer.
|
|
91
|
+
|
|
92
|
+
RETURNS:
|
|
93
|
+
None
|
|
94
|
+
|
|
95
|
+
RAISES:
|
|
96
|
+
None
|
|
97
|
+
|
|
98
|
+
EXAMPLES:
|
|
99
|
+
# Remove key "vs_session_id" from _InternalBuffer.
|
|
100
|
+
>>> _InternalBuffer.remove_key("vs_session_id")
|
|
101
|
+
"""
|
|
102
|
+
del cls.__data[key]
|
|
103
|
+
|
|
104
|
+
@classmethod
|
|
105
|
+
def remove_keys(cls, keys):
|
|
106
|
+
"""
|
|
107
|
+
DESCRIPTION:
|
|
108
|
+
Removes specified keys from the internal buffer.
|
|
109
|
+
|
|
110
|
+
RETURNS:
|
|
111
|
+
None
|
|
112
|
+
|
|
113
|
+
RAISES:
|
|
114
|
+
None
|
|
115
|
+
|
|
116
|
+
EXAMPLES:
|
|
117
|
+
# Remove keys "list_base_envs" and "default_base_env" from _InternalBuffer.
|
|
118
|
+
>>> _InternalBuffer.remove_keys(['list_base_envs', 'default_base_env'])
|
|
119
|
+
"""
|
|
120
|
+
for key in keys:
|
|
121
|
+
if cls.__data.get(key) is not None:
|
|
122
|
+
del cls.__data[key]
|