teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/README.md +210 -0
- teradataml/__init__.py +1 -1
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +162 -76
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/__init__.py +2 -0
- teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
- teradataml/analytics/json_parser/metadata.py +22 -4
- teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
- teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
- teradataml/analytics/sqle/__init__.py +3 -0
- teradataml/analytics/utils.py +4 -1
- teradataml/automl/__init__.py +2369 -464
- teradataml/automl/autodataprep/__init__.py +15 -0
- teradataml/automl/custom_json_utils.py +184 -112
- teradataml/automl/data_preparation.py +113 -58
- teradataml/automl/data_transformation.py +154 -53
- teradataml/automl/feature_engineering.py +113 -53
- teradataml/automl/feature_exploration.py +548 -25
- teradataml/automl/model_evaluation.py +260 -32
- teradataml/automl/model_training.py +399 -206
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/aed_utils.py +11 -2
- teradataml/common/bulk_exposed_utils.py +4 -2
- teradataml/common/constants.py +62 -2
- teradataml/common/garbagecollector.py +50 -21
- teradataml/common/messagecodes.py +47 -2
- teradataml/common/messages.py +19 -1
- teradataml/common/sqlbundle.py +23 -6
- teradataml/common/utils.py +116 -10
- teradataml/context/aed_context.py +16 -10
- teradataml/data/Employee.csv +5 -0
- teradataml/data/Employee_Address.csv +4 -0
- teradataml/data/Employee_roles.csv +5 -0
- teradataml/data/JulesBelvezeDummyData.csv +100 -0
- teradataml/data/byom_example.json +5 -0
- teradataml/data/creditcard_data.csv +284618 -0
- teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
- teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
- teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
- teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
- teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
- teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
- teradataml/data/load_example_data.py +29 -11
- teradataml/data/payment_fraud_dataset.csv +10001 -0
- teradataml/data/teradataml_example.json +67 -0
- teradataml/dataframe/copy_to.py +714 -54
- teradataml/dataframe/dataframe.py +1153 -33
- teradataml/dataframe/dataframe_utils.py +8 -3
- teradataml/dataframe/functions.py +168 -1
- teradataml/dataframe/setop.py +4 -1
- teradataml/dataframe/sql.py +141 -9
- teradataml/dbutils/dbutils.py +470 -35
- teradataml/dbutils/filemgr.py +1 -1
- teradataml/hyperparameter_tuner/optimizer.py +456 -142
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/lib/libaed_0_1.dylib +0 -0
- teradataml/lib/libaed_0_1.so +0 -0
- teradataml/lib/libaed_0_1_aarch64.so +0 -0
- teradataml/scriptmgmt/UserEnv.py +234 -34
- teradataml/scriptmgmt/lls_utils.py +43 -17
- teradataml/sdk/_json_parser.py +1 -1
- teradataml/sdk/api_client.py +9 -6
- teradataml/sdk/modelops/_client.py +3 -0
- teradataml/series/series.py +12 -7
- teradataml/store/feature_store/constants.py +601 -234
- teradataml/store/feature_store/feature_store.py +2886 -616
- teradataml/store/feature_store/mind_map.py +639 -0
- teradataml/store/feature_store/models.py +5831 -214
- teradataml/store/feature_store/utils.py +390 -0
- teradataml/table_operators/table_operator_util.py +1 -1
- teradataml/table_operators/templates/dataframe_register.template +6 -2
- teradataml/table_operators/templates/dataframe_udf.template +6 -2
- teradataml/utils/docstring.py +527 -0
- teradataml/utils/dtypes.py +93 -0
- teradataml/utils/internal_buffer.py +2 -2
- teradataml/utils/utils.py +41 -2
- teradataml/utils/validators.py +694 -17
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
|
@@ -1796,7 +1796,8 @@ class DataFrameUtils():
|
|
|
1796
1796
|
fil_nodeid = aed_utils._aed_filter(sel_nodeid, filter_str)
|
|
1797
1797
|
sel2_nodeid = aed_utils._aed_select(fil_nodeid, sel_cols_str)
|
|
1798
1798
|
col_names, col_types = __class__._get_column_names_and_types_from_metaexpr(df._metaexpr)
|
|
1799
|
-
new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types)
|
|
1799
|
+
new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types),
|
|
1800
|
+
datalake=df._metaexpr.datalake)
|
|
1800
1801
|
# Call the function from_node from appropriate class either DataFrame or GeoDataFrame
|
|
1801
1802
|
new_df = df.__class__._from_node(sel2_nodeid, new_metaexpr, df._index_label)
|
|
1802
1803
|
new_df._orderby = df._orderby
|
|
@@ -1855,6 +1856,8 @@ class DataFrameUtils():
|
|
|
1855
1856
|
|
|
1856
1857
|
db_schema = UtilFuncs._extract_db_name(tab_name_first)
|
|
1857
1858
|
db_table_name = UtilFuncs._extract_table_name(tab_name_first)
|
|
1859
|
+
if dfs[0]._metaexpr.datalake:
|
|
1860
|
+
return DataFrame(in_schema(db_schema, db_table_name, dfs[0]._metaexpr.datalake))
|
|
1858
1861
|
|
|
1859
1862
|
if db_schema:
|
|
1860
1863
|
return DataFrame(in_schema(db_schema, db_table_name))
|
|
@@ -1875,7 +1878,9 @@ class DataFrameUtils():
|
|
|
1875
1878
|
db_schema = UtilFuncs._extract_db_name(tab_name_first)
|
|
1876
1879
|
db_table_name = UtilFuncs._extract_table_name(tab_name_first)
|
|
1877
1880
|
|
|
1878
|
-
if
|
|
1881
|
+
if dfs[i]._metaexpr.datalake:
|
|
1882
|
+
parent_df = DataFrame(in_schema(db_schema, db_table_name, dfs[i]._metaexpr.datalake))
|
|
1883
|
+
elif db_schema:
|
|
1879
1884
|
parent_df = DataFrame(in_schema(db_schema, db_table_name))
|
|
1880
1885
|
else:
|
|
1881
1886
|
parent_df = DataFrame(db_table_name)
|
|
@@ -1919,7 +1924,7 @@ class DataFrameUtils():
|
|
|
1919
1924
|
# 2. Comma separated parameters enclosed in parentheses
|
|
1920
1925
|
# 3. Comma separated parameters without parenthesis
|
|
1921
1926
|
# 4. Remaining string
|
|
1922
|
-
pattern = "([A-Z0-9_]+)(\((.*)\))?(.*)"
|
|
1927
|
+
pattern = r"([A-Z0-9_]+)(\((.*)\))?(.*)"
|
|
1923
1928
|
|
|
1924
1929
|
m = re.match(pattern, td_type)
|
|
1925
1930
|
td_str_type = m.group(1)
|
|
@@ -8,7 +8,7 @@ from teradataml.utils.utils import execute_sql
|
|
|
8
8
|
import teradatasqlalchemy as tdsqlalchemy
|
|
9
9
|
from teradataml.utils.validators import _Validators
|
|
10
10
|
from teradataml.dataframe.sql import _SQLColumnExpression
|
|
11
|
-
from teradatasqlalchemy import VARCHAR, CLOB, CHAR
|
|
11
|
+
from teradatasqlalchemy import VARCHAR, CLOB, CHAR, DATE, TIMESTAMP
|
|
12
12
|
from teradataml.common.constants import TableOperatorConstants, TeradataConstants, TeradataTypes
|
|
13
13
|
from teradataml.common.utils import UtilFuncs
|
|
14
14
|
from teradataml.dataframe.sql_interfaces import ColumnExpression
|
|
@@ -17,6 +17,7 @@ from teradataml.common.exceptions import TeradataMlException
|
|
|
17
17
|
from teradataml.common.messages import Messages
|
|
18
18
|
from teradataml.common.messagecodes import MessageCodes
|
|
19
19
|
from teradataml.scriptmgmt.lls_utils import get_env
|
|
20
|
+
from sqlalchemy import literal_column
|
|
20
21
|
|
|
21
22
|
def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',', quotechar=None, debug=False):
|
|
22
23
|
"""
|
|
@@ -31,6 +32,8 @@ def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',
|
|
|
31
32
|
packages should be inside the user defined function itself.
|
|
32
33
|
3. Do not call a regular function defined outside the udf() from the user defined function.
|
|
33
34
|
The function definition and call must be inside the udf(). Look at Example 9 to understand more.
|
|
35
|
+
4. One can use the `td_buffer` to cache the data in the user defined function.
|
|
36
|
+
Look at Example 10 to understand more.
|
|
34
37
|
|
|
35
38
|
PARAMETERS:
|
|
36
39
|
user_function:
|
|
@@ -321,6 +324,56 @@ def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',
|
|
|
321
324
|
Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2021-10-06
|
|
322
325
|
Red Inc 200.0 150.0 140.0 NaN 17/01/04 2021-10-06
|
|
323
326
|
>>>
|
|
327
|
+
|
|
328
|
+
# Example 10: Define a user defined function 'sentiment_analysis' to perform
|
|
329
|
+
# sentiment analysis on the 'review' column using VADER.
|
|
330
|
+
# Note - Cache the model in UDF using 'td_buffer' to avoid loading
|
|
331
|
+
# the model every time the UDF is called.
|
|
332
|
+
|
|
333
|
+
# Load the data to run the example.
|
|
334
|
+
>>> from teradataml import *
|
|
335
|
+
>>> load_example_data("sentimentextractor", "sentiment_extract_input")
|
|
336
|
+
>>> df = DataFrame("sentiment_extract_input")
|
|
337
|
+
|
|
338
|
+
# Create the environment and install the required library.
|
|
339
|
+
>>> env = create_env('text_analysis', 'python_3.10', 'Test environment for UDF')
|
|
340
|
+
>>> env.install_lib('vaderSentiment')
|
|
341
|
+
|
|
342
|
+
# Create a user defined function to perform sentiment analysis.
|
|
343
|
+
>>> from teradatasqlalchemy.types import VARCHAR
|
|
344
|
+
>>> @udf(env_name = env, returns = VARCHAR(80), delimiter='|')
|
|
345
|
+
... def sentiment_analysis(txt):
|
|
346
|
+
... if 'vader_model' not in td_buffer:
|
|
347
|
+
... from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
|
|
348
|
+
... td_buffer['vader_model'] = SentimentIntensityAnalyzer()
|
|
349
|
+
... sid_obj = td_buffer['vader_model']
|
|
350
|
+
...
|
|
351
|
+
... sentiment_dict = sid_obj.polarity_scores(txt)
|
|
352
|
+
... if sentiment_dict['compound'] >= 0.05 :
|
|
353
|
+
... sentiment = "Positive"
|
|
354
|
+
... elif sentiment_dict['compound'] <= - 0.05 :
|
|
355
|
+
... sentiment = "Negative"
|
|
356
|
+
... else :
|
|
357
|
+
... sentiment = "Neutral"
|
|
358
|
+
... return sentiment
|
|
359
|
+
|
|
360
|
+
# Assign the Column Expression returned by user defined function
|
|
361
|
+
# to the DataFrame.
|
|
362
|
+
>>> res = df.assign(sentiment = sentiment_analysis('review'))
|
|
363
|
+
>>> res = res.select(["id", "product", "sentiment"])
|
|
364
|
+
>>> res
|
|
365
|
+
id product sentiment
|
|
366
|
+
0 5 gps Positive
|
|
367
|
+
1 9 television Negative
|
|
368
|
+
2 8 camera Negative
|
|
369
|
+
3 10 camera Negative
|
|
370
|
+
4 1 camera Positive
|
|
371
|
+
5 4 gps Positive
|
|
372
|
+
6 2 office suite Positive
|
|
373
|
+
7 7 gps Negative
|
|
374
|
+
8 6 gps Negative
|
|
375
|
+
9 3 camera Positive
|
|
376
|
+
>>>
|
|
324
377
|
"""
|
|
325
378
|
|
|
326
379
|
allowed_datatypes = TeradataTypes.TD_ALL_TYPES.value
|
|
@@ -981,6 +1034,120 @@ def td_range(start, end=None, step=1):
|
|
|
981
1034
|
df = DataFrame.from_query(range_query)
|
|
982
1035
|
return df
|
|
983
1036
|
|
|
1037
|
+
def current_date(time_zone='local'):
|
|
1038
|
+
"""
|
|
1039
|
+
DESCRIPTION:
|
|
1040
|
+
Returns the current date based on the specified time zone.
|
|
1041
|
+
|
|
1042
|
+
PARAMETERS:
|
|
1043
|
+
time_zone:
|
|
1044
|
+
Optional Argument.
|
|
1045
|
+
Specifies the time zone to use for retrieving the current date.
|
|
1046
|
+
Permitted Values:
|
|
1047
|
+
- "local": Uses the local time zone.
|
|
1048
|
+
- Any valid time zone string.
|
|
1049
|
+
Default Value: "local"
|
|
1050
|
+
Types: str
|
|
1051
|
+
|
|
1052
|
+
RETURNS:
|
|
1053
|
+
ColumnExpression.
|
|
1054
|
+
|
|
1055
|
+
RAISES:
|
|
1056
|
+
None
|
|
1057
|
+
|
|
1058
|
+
EXAMPLES:
|
|
1059
|
+
# Example 1: Add a new column to the DataFrame that contains the
|
|
1060
|
+
# current date as its value. Consider system specified
|
|
1061
|
+
# timezone as timezone.
|
|
1062
|
+
>>> from teradataml.dataframe.functions import current_date
|
|
1063
|
+
>>> load_example_data('dataframe', 'sales')
|
|
1064
|
+
>>> df = DataFrame("sales")
|
|
1065
|
+
>>> df.assign(current_date=current_date())
|
|
1066
|
+
accounts Feb Jan Mar Apr datetime current_date
|
|
1067
|
+
Alpha Co 210.0 200.0 215 250 04/01/2017 25/05/27
|
|
1068
|
+
Blue Inc 90.0 50 95 101 04/01/2017 25/05/27
|
|
1069
|
+
Jones LLC 200.0 150 140 180 04/01/2017 25/05/27
|
|
1070
|
+
Orange Inc 210.0 None None 250 04/01/2017 25/05/27
|
|
1071
|
+
Yellow Inc 90.0 None None None 04/01/2017 25/05/27
|
|
1072
|
+
Red Inc 200.0 150 140 None 04/01/2017 25/05/27
|
|
1073
|
+
|
|
1074
|
+
# Example 2: Add a new column to the DataFrame that contains the
|
|
1075
|
+
# current date in a specific time zone as its value.
|
|
1076
|
+
>>> from teradataml.dataframe.functions import current_date
|
|
1077
|
+
>>> load_example_data('dataframe', 'sales')
|
|
1078
|
+
>>> df = DataFrame("sales")
|
|
1079
|
+
>>> df.assign(current_date=current_date("GMT"))
|
|
1080
|
+
accounts Feb Jan Mar Apr datetime current_date
|
|
1081
|
+
Alpha Co 210.0 200.0 215 250 04/01/2017 25/05/27
|
|
1082
|
+
Blue Inc 90.0 50 95 101 04/01/2017 25/05/27
|
|
1083
|
+
Jones LLC 200.0 150 140 180 04/01/2017 25/05/27
|
|
1084
|
+
Orange Inc 210.0 None None 250 04/01/2017 25/05/27
|
|
1085
|
+
Yellow Inc 90.0 None None None 04/01/2017 25/05/27
|
|
1086
|
+
Red Inc 200.0 150 140 None 04/01/2017 25/05/27
|
|
1087
|
+
|
|
1088
|
+
"""
|
|
1089
|
+
if time_zone == "local":
|
|
1090
|
+
expr_ = "CURRENT_DATE AT LOCAL"
|
|
1091
|
+
else:
|
|
1092
|
+
expr_ = "CURRENT_DATE AT TIME ZONE '{}'".format(time_zone)
|
|
1093
|
+
return _SQLColumnExpression(literal_column(expr_), type = DATE())
|
|
1094
|
+
|
|
1095
|
+
def current_timestamp(time_zone='local'):
|
|
1096
|
+
"""
|
|
1097
|
+
DESCRIPTION:
|
|
1098
|
+
Returns the current timestamp based on the specified time zone.
|
|
1099
|
+
|
|
1100
|
+
PARAMETERS:
|
|
1101
|
+
time_zone:
|
|
1102
|
+
Optional Argument.
|
|
1103
|
+
Specifies the time zone to use for retrieving the current timestamp.
|
|
1104
|
+
Permitted Values:
|
|
1105
|
+
- "local": Uses the local time zone.
|
|
1106
|
+
- Any valid time zone string.
|
|
1107
|
+
Default Value: "local"
|
|
1108
|
+
Types: str
|
|
1109
|
+
|
|
1110
|
+
RETURNS:
|
|
1111
|
+
ColumnExpression.
|
|
1112
|
+
|
|
1113
|
+
RAISES:
|
|
1114
|
+
None
|
|
1115
|
+
|
|
1116
|
+
EXAMPLES:
|
|
1117
|
+
# Example 1: Assign the current timestamp in the local time zone to a DataFrame column.
|
|
1118
|
+
>>> from teradataml.dataframe.functions import current_timestamp
|
|
1119
|
+
>>> load_example_data('dataframe', 'sales')
|
|
1120
|
+
>>> df = DataFrame("sales")
|
|
1121
|
+
>>> df.assign(current_timestamp = current_timestamp())
|
|
1122
|
+
accounts Feb Jan Mar Apr datetime current_timestamp
|
|
1123
|
+
Alpha Co 210.0 200 215 250 04/01/2017 2025-05-27 17:36:56.750000+00:00
|
|
1124
|
+
Blue Inc 90.0 50 95 101 04/01/2017 2025-05-27 17:36:56.750000+00:00
|
|
1125
|
+
Jones LLC 200.0 150 140 180 04/01/2017 2025-05-27 17:36:56.750000+00:00
|
|
1126
|
+
Orange Inc 210.0 None None 250 04/01/2017 2025-05-27 17:36:56.750000+00:00
|
|
1127
|
+
Yellow Inc 90.0 None None None 04/01/2017 2025-05-27 17:36:56.750000+00:00
|
|
1128
|
+
Red Inc 200.0 150 140 None 04/01/2017 2025-05-27 17:36:56.750000+00:00
|
|
1129
|
+
|
|
1130
|
+
# Example 2: Assign the current timestamp in a specific time zone to a DataFrame column.
|
|
1131
|
+
>>> from teradataml.dataframe.functions import current_timestamp
|
|
1132
|
+
>>> load_example_data('dataframe', 'sales')
|
|
1133
|
+
>>> df = DataFrame("sales")
|
|
1134
|
+
>>> df.assign(current_timestamp = current_timestamp("GMT+10"))
|
|
1135
|
+
accounts Feb Jan Mar Apr datetime current_timestamp
|
|
1136
|
+
Blue Inc 90.0 50 95 101 04/01/2017 2025-05-28 03:39:00.790000+10:00
|
|
1137
|
+
Red Inc 200.0 150 140 None 04/01/2017 2025-05-28 03:39:00.790000+10:00
|
|
1138
|
+
Yellow Inc 90.0 None None None 04/01/2017 2025-05-28 03:39:00.790000+10:00
|
|
1139
|
+
Jones LLC 200.0 150 140 180 04/01/2017 2025-05-28 03:39:00.790000+10:00
|
|
1140
|
+
Orange Inc 210.0 None None 250 04/01/2017 2025-05-28 03:39:00.790000+10:00
|
|
1141
|
+
Alpha Co 210.0 200 215 250 04/01/2017 2025-05-28 03:39:00.790000+10:00
|
|
1142
|
+
|
|
1143
|
+
"""
|
|
1144
|
+
|
|
1145
|
+
if time_zone == "local":
|
|
1146
|
+
expr_ = "CURRENT_TIMESTAMP AT LOCAL"
|
|
1147
|
+
else:
|
|
1148
|
+
expr_ = "CURRENT_TIMESTAMP AT TIME ZONE '{}'".format(time_zone)
|
|
1149
|
+
return _SQLColumnExpression(literal_column(expr_), type = TIMESTAMP())
|
|
1150
|
+
|
|
984
1151
|
def get_formatters(formatter_type = None):
|
|
985
1152
|
"""
|
|
986
1153
|
DESCRIPTION:
|
teradataml/dataframe/setop.py
CHANGED
|
@@ -19,6 +19,7 @@ from teradataml.common.utils import UtilFuncs
|
|
|
19
19
|
from teradataml.dataframe import dataframe
|
|
20
20
|
from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
|
|
21
21
|
from teradataml.common.aed_utils import AedUtils
|
|
22
|
+
from teradataml.dataframe.sql import _MetaExpression
|
|
22
23
|
from teradataml.utils.validators import _Validators
|
|
23
24
|
from teradatasqlalchemy.dialect import dialect as td_dialect, TeradataTypeCompiler as td_type_compiler
|
|
24
25
|
from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
|
|
@@ -346,7 +347,9 @@ def __process_operation(meta_data, is_lazy, setop_type, nodeid, index_label, ind
|
|
|
346
347
|
break
|
|
347
348
|
|
|
348
349
|
# Constructing new Metadata (_metaexpr) without DB; using dummy nodeid and get new metaexpr for nodeid.
|
|
349
|
-
meta_data = UtilFuncs._get_metaexpr_using_columns(nodeid, column_info
|
|
350
|
+
meta_data = UtilFuncs._get_metaexpr_using_columns(nodeid, column_info,
|
|
351
|
+
datalake=meta_data.datalake if isinstance(meta_data, _MetaExpression) else None) if is_lazy \
|
|
352
|
+
else meta_data
|
|
350
353
|
|
|
351
354
|
if is_lazy:
|
|
352
355
|
return getattr(module, class_name)._from_node(nodeid, meta_data, index_label)
|
teradataml/dataframe/sql.py
CHANGED
|
@@ -200,11 +200,10 @@ class _MetaExpression(object):
|
|
|
200
200
|
RAISES:
|
|
201
201
|
AttributeError if attribute can't be found
|
|
202
202
|
"""
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
203
|
+
try:
|
|
204
|
+
res = getattr(self.__t, key)
|
|
205
|
+
except AttributeError:
|
|
206
206
|
raise AttributeError('Unable to find attribute: %s' % key)
|
|
207
|
-
|
|
208
207
|
return res
|
|
209
208
|
|
|
210
209
|
@property
|
|
@@ -501,7 +500,7 @@ class _SQLTableExpression(_PandasTableExpression):
|
|
|
501
500
|
self.c = [_SQLColumnExpression(c) for c in table.c]
|
|
502
501
|
|
|
503
502
|
self._n_rows = 0
|
|
504
|
-
|
|
503
|
+
self._datalake = kw.get('datalake', None)
|
|
505
504
|
|
|
506
505
|
@property
|
|
507
506
|
def c(self):
|
|
@@ -557,6 +556,13 @@ class _SQLTableExpression(_PandasTableExpression):
|
|
|
557
556
|
|
|
558
557
|
self.__t = table
|
|
559
558
|
|
|
559
|
+
@property
|
|
560
|
+
def datalake(self):
|
|
561
|
+
"""
|
|
562
|
+
Returns the underlying datalake information
|
|
563
|
+
"""
|
|
564
|
+
return self._datalake
|
|
565
|
+
|
|
560
566
|
def __repr__(self):
|
|
561
567
|
"""
|
|
562
568
|
Returns a SELECT TOP string representing the underlying table.
|
|
@@ -10504,24 +10510,24 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
10504
10510
|
# teradataml does not support regex grouping hence in some cases first used 'regex_replace' and
|
|
10505
10511
|
# then 'regex_substr' or vice-versa.
|
|
10506
10512
|
_part_to_extract_dict = {'HOST': _SQLColumnExpression(
|
|
10507
|
-
func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), '(//[^/?#]+@)|(//)|(:\d+)', ''),
|
|
10513
|
+
func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), r'(//[^/?#]+@)|(//)|(:\d+)', ''),
|
|
10508
10514
|
type=VARCHAR()),
|
|
10509
10515
|
'PATH': _SQLColumnExpression(func.regexp_substr(
|
|
10510
10516
|
func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
|
|
10511
10517
|
'([^?#]*)'), type=VARCHAR()),
|
|
10512
10518
|
'QUERY': _SQLColumnExpression(func.ltrim(func.regexp_substr(
|
|
10513
10519
|
func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)', ''),
|
|
10514
|
-
'\?([^#]*)'), '?'), type=VARCHAR()),
|
|
10520
|
+
r'\?([^#]*)'), '?'), type=VARCHAR()),
|
|
10515
10521
|
'REF': _SQLColumnExpression(func.ltrim(func.regexp_substr(
|
|
10516
10522
|
func.regexp_replace(self.expression,
|
|
10517
|
-
'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?', ''),
|
|
10523
|
+
r'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?', ''),
|
|
10518
10524
|
'(#(.*))'), '#'), type=VARCHAR()),
|
|
10519
10525
|
'PROTOCOL': _SQLColumnExpression(
|
|
10520
10526
|
func.rtrim(func.regexp_substr(self.expression, '^(([^:/?#]+):)'), ':'),
|
|
10521
10527
|
type=VARCHAR()),
|
|
10522
10528
|
'FILE': _SQLColumnExpression(func.regexp_substr(
|
|
10523
10529
|
func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
|
|
10524
|
-
'([^?#]*)(\?([^#]*))?'), type=VARCHAR()),
|
|
10530
|
+
r'([^?#]*)(\?([^#]*))?'), type=VARCHAR()),
|
|
10525
10531
|
'AUTHORITY': _SQLColumnExpression(
|
|
10526
10532
|
func.ltrim(func.regexp_substr(self.expression, '//([^/?#]*)'), '//'),
|
|
10527
10533
|
type=VARCHAR()),
|
|
@@ -10770,3 +10776,129 @@ class _SQLColumnExpression(_LogicalColumnExpression,
|
|
|
10770
10776
|
|
|
10771
10777
|
"""
|
|
10772
10778
|
return _SQLColumnExpression(literal_column(f"TD_ISFINITE({self.compile()})"), type=INTEGER)
|
|
10779
|
+
|
|
10780
|
+
def between(self, lower, upper):
|
|
10781
|
+
"""
|
|
10782
|
+
DESCRIPTION:
|
|
10783
|
+
Evaluates whether the column value is between the lower and upper bounds.
|
|
10784
|
+
The lower and upper bounds are inclusive.
|
|
10785
|
+
|
|
10786
|
+
PARAMETERS:
|
|
10787
|
+
lower:
|
|
10788
|
+
Required Argument.
|
|
10789
|
+
Specifies the lower bound value.
|
|
10790
|
+
Type: ColumnExpression or str or int or float
|
|
10791
|
+
|
|
10792
|
+
upper:
|
|
10793
|
+
Required Argument.
|
|
10794
|
+
Specifies the upper bound value.
|
|
10795
|
+
Type: ColumnExpression or str or int or float
|
|
10796
|
+
|
|
10797
|
+
RETURNS:
|
|
10798
|
+
ColumnExpression
|
|
10799
|
+
|
|
10800
|
+
EXAMPLES:
|
|
10801
|
+
# Load the data to run the example.
|
|
10802
|
+
>>> load_example_data("dataframe", "sales")
|
|
10803
|
+
>>> df = DataFrame("sales")
|
|
10804
|
+
>>> print(df)
|
|
10805
|
+
Feb Jan Mar Apr datetime
|
|
10806
|
+
accounts
|
|
10807
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
10808
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
10809
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
10810
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
10811
|
+
Red Inc 200.0 150.0 140.0 NaN 04/01/2017
|
|
10812
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
10813
|
+
|
|
10814
|
+
# Example 1: Check if column 'Feb' is between 100 and 200.
|
|
10815
|
+
>>> new_df = df[df.Feb.between(100, 200)]
|
|
10816
|
+
>>> print(new_df)
|
|
10817
|
+
Feb Jan Mar Apr datetime
|
|
10818
|
+
accounts
|
|
10819
|
+
Jones LLC 200.0 150 140 180.0 04/01/2017
|
|
10820
|
+
Red Inc 200.0 150 140 NaN 04/01/2017
|
|
10821
|
+
|
|
10822
|
+
# Example 2: Check if column 'datetime' is between '01-01-2017' and '30-01-2017'.
|
|
10823
|
+
>>> new_df = df[df.datetime.between('01-01-2017', '30-01-2017')]
|
|
10824
|
+
>>> print(new_df)
|
|
10825
|
+
Feb Jan Mar Apr datetime
|
|
10826
|
+
accounts
|
|
10827
|
+
Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
|
|
10828
|
+
Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
|
|
10829
|
+
Yellow Inc 90.0 NaN NaN NaN 04/01/2017
|
|
10830
|
+
Red Inc 200.0 150.0 140.0 NaN 04/01/2017
|
|
10831
|
+
Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
|
|
10832
|
+
Orange Inc 210.0 NaN NaN 250.0 04/01/2017
|
|
10833
|
+
"""
|
|
10834
|
+
return _SQLColumnExpression(self.expression.between(lower, upper))
|
|
10835
|
+
|
|
10836
|
+
def begin(self):
|
|
10837
|
+
"""
|
|
10838
|
+
DESCRIPTION:
|
|
10839
|
+
Retrieves the beginning date or timestamp from a PERIOD column.
|
|
10840
|
+
|
|
10841
|
+
PARAMETERS:
|
|
10842
|
+
None.
|
|
10843
|
+
|
|
10844
|
+
RETURNS:
|
|
10845
|
+
ColumnExpression.
|
|
10846
|
+
|
|
10847
|
+
RAISES:
|
|
10848
|
+
TeradataMlException.
|
|
10849
|
+
|
|
10850
|
+
EXAMPLES:
|
|
10851
|
+
# Load the data to run the example.
|
|
10852
|
+
>>> load_example_data("teradataml", "Employee_roles")
|
|
10853
|
+
|
|
10854
|
+
# Create a DataFrame on 'employee_roles' table.
|
|
10855
|
+
>>> df = DataFrame("employee_roles")
|
|
10856
|
+
|
|
10857
|
+
# Extract the starting date from the period column 'role_validity_period'
|
|
10858
|
+
# and assign it to a new column.
|
|
10859
|
+
>>> df = df.assign(start_date_col = df['role_validity_period'].begin())
|
|
10860
|
+
EmployeeID EmployeeName Department Salary role_validity_period start_date_col
|
|
10861
|
+
1 John Doe IT 100.000 ('20/01/01', '24/12/31') 20/01/01
|
|
10862
|
+
2 Jane Smith DA 200.000 ('20/01/01', '99/12/31') 20/01/01
|
|
10863
|
+
3 Bob Marketing 330.000 ('25/01/01', '99/12/31') 25/01/01
|
|
10864
|
+
3 Bob Sales 300.000 ('24/01/01', '24/12/31') 24/01/01
|
|
10865
|
+
|
|
10866
|
+
"""
|
|
10867
|
+
_Validators._validate_period_column_type(self._type)
|
|
10868
|
+
element_type = DATE if isinstance(self._type, PERIOD_DATE) else TIMESTAMP
|
|
10869
|
+
return _SQLColumnExpression(literal_column(f"BEGIN({self.compile()})"), type = element_type)
|
|
10870
|
+
|
|
10871
|
+
def end(self):
|
|
10872
|
+
"""
|
|
10873
|
+
DESCRIPTION:
|
|
10874
|
+
Retrieves the ending date or timestamp from a PERIOD column.
|
|
10875
|
+
|
|
10876
|
+
PARAMETERS:
|
|
10877
|
+
None.
|
|
10878
|
+
|
|
10879
|
+
RETURNS:
|
|
10880
|
+
ColumnExpression.
|
|
10881
|
+
|
|
10882
|
+
RAISES:
|
|
10883
|
+
TeradataMlException.
|
|
10884
|
+
|
|
10885
|
+
EXAMPLES:
|
|
10886
|
+
# Load the data to run the example.
|
|
10887
|
+
>>> load_example_data("teradataml", "Employee_roles")
|
|
10888
|
+
|
|
10889
|
+
# Create a DataFrame on 'employee_roles' table.
|
|
10890
|
+
>>> df = DataFrame("employee_roles")
|
|
10891
|
+
|
|
10892
|
+
# Extract the ending date from the period column 'role_validity_period'
|
|
10893
|
+
# and assign it to a new column.
|
|
10894
|
+
>>> df = df.assign(end_date_col = df['role_validity_period'].end())
|
|
10895
|
+
EmployeeID EmployeeName Department Salary role_validity_period end_date_col
|
|
10896
|
+
1 John Doe IT 100.000 ('20/01/01', '24/12/31') 24/12/31
|
|
10897
|
+
2 Jane Smith DA 200.000 ('20/01/01', '99/12/31') 99/12/31
|
|
10898
|
+
3 Bob Marketing 330.000 ('25/01/01', '99/12/31') 99/12/31
|
|
10899
|
+
3 Bob Sales 300.000 ('24/01/01', '24/12/31') 24/12/31
|
|
10900
|
+
|
|
10901
|
+
"""
|
|
10902
|
+
_Validators._validate_period_column_type(self._type)
|
|
10903
|
+
element_type = DATE if isinstance(self._type, PERIOD_DATE) else TIMESTAMP
|
|
10904
|
+
return _SQLColumnExpression(literal_column(f"END({self.compile()})"), type = element_type)
|