teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (96) hide show
  1. teradataml/README.md +210 -0
  2. teradataml/__init__.py +1 -1
  3. teradataml/_version.py +1 -1
  4. teradataml/analytics/analytic_function_executor.py +162 -76
  5. teradataml/analytics/byom/__init__.py +1 -1
  6. teradataml/analytics/json_parser/__init__.py +2 -0
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
  8. teradataml/analytics/json_parser/metadata.py +22 -4
  9. teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
  10. teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
  11. teradataml/analytics/sqle/__init__.py +3 -0
  12. teradataml/analytics/utils.py +4 -1
  13. teradataml/automl/__init__.py +2369 -464
  14. teradataml/automl/autodataprep/__init__.py +15 -0
  15. teradataml/automl/custom_json_utils.py +184 -112
  16. teradataml/automl/data_preparation.py +113 -58
  17. teradataml/automl/data_transformation.py +154 -53
  18. teradataml/automl/feature_engineering.py +113 -53
  19. teradataml/automl/feature_exploration.py +548 -25
  20. teradataml/automl/model_evaluation.py +260 -32
  21. teradataml/automl/model_training.py +399 -206
  22. teradataml/clients/auth_client.py +2 -2
  23. teradataml/common/aed_utils.py +11 -2
  24. teradataml/common/bulk_exposed_utils.py +4 -2
  25. teradataml/common/constants.py +62 -2
  26. teradataml/common/garbagecollector.py +50 -21
  27. teradataml/common/messagecodes.py +47 -2
  28. teradataml/common/messages.py +19 -1
  29. teradataml/common/sqlbundle.py +23 -6
  30. teradataml/common/utils.py +116 -10
  31. teradataml/context/aed_context.py +16 -10
  32. teradataml/data/Employee.csv +5 -0
  33. teradataml/data/Employee_Address.csv +4 -0
  34. teradataml/data/Employee_roles.csv +5 -0
  35. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  36. teradataml/data/byom_example.json +5 -0
  37. teradataml/data/creditcard_data.csv +284618 -0
  38. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  39. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
  40. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
  42. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  43. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
  44. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
  45. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
  46. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
  47. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
  48. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
  49. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
  50. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
  51. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
  52. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
  53. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
  54. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  55. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  56. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  57. teradataml/data/load_example_data.py +29 -11
  58. teradataml/data/payment_fraud_dataset.csv +10001 -0
  59. teradataml/data/teradataml_example.json +67 -0
  60. teradataml/dataframe/copy_to.py +714 -54
  61. teradataml/dataframe/dataframe.py +1153 -33
  62. teradataml/dataframe/dataframe_utils.py +8 -3
  63. teradataml/dataframe/functions.py +168 -1
  64. teradataml/dataframe/setop.py +4 -1
  65. teradataml/dataframe/sql.py +141 -9
  66. teradataml/dbutils/dbutils.py +470 -35
  67. teradataml/dbutils/filemgr.py +1 -1
  68. teradataml/hyperparameter_tuner/optimizer.py +456 -142
  69. teradataml/lib/aed_0_1.dll +0 -0
  70. teradataml/lib/libaed_0_1.dylib +0 -0
  71. teradataml/lib/libaed_0_1.so +0 -0
  72. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  73. teradataml/scriptmgmt/UserEnv.py +234 -34
  74. teradataml/scriptmgmt/lls_utils.py +43 -17
  75. teradataml/sdk/_json_parser.py +1 -1
  76. teradataml/sdk/api_client.py +9 -6
  77. teradataml/sdk/modelops/_client.py +3 -0
  78. teradataml/series/series.py +12 -7
  79. teradataml/store/feature_store/constants.py +601 -234
  80. teradataml/store/feature_store/feature_store.py +2886 -616
  81. teradataml/store/feature_store/mind_map.py +639 -0
  82. teradataml/store/feature_store/models.py +5831 -214
  83. teradataml/store/feature_store/utils.py +390 -0
  84. teradataml/table_operators/table_operator_util.py +1 -1
  85. teradataml/table_operators/templates/dataframe_register.template +6 -2
  86. teradataml/table_operators/templates/dataframe_udf.template +6 -2
  87. teradataml/utils/docstring.py +527 -0
  88. teradataml/utils/dtypes.py +93 -0
  89. teradataml/utils/internal_buffer.py +2 -2
  90. teradataml/utils/utils.py +41 -2
  91. teradataml/utils/validators.py +694 -17
  92. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
  93. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
  94. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
  95. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
  96. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
@@ -1796,7 +1796,8 @@ class DataFrameUtils():
1796
1796
  fil_nodeid = aed_utils._aed_filter(sel_nodeid, filter_str)
1797
1797
  sel2_nodeid = aed_utils._aed_select(fil_nodeid, sel_cols_str)
1798
1798
  col_names, col_types = __class__._get_column_names_and_types_from_metaexpr(df._metaexpr)
1799
- new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types))
1799
+ new_metaexpr = UtilFuncs._get_metaexpr_using_columns(df._nodeid, zip(col_names, col_types),
1800
+ datalake=df._metaexpr.datalake)
1800
1801
  # Call the function from_node from appropriate class either DataFrame or GeoDataFrame
1801
1802
  new_df = df.__class__._from_node(sel2_nodeid, new_metaexpr, df._index_label)
1802
1803
  new_df._orderby = df._orderby
@@ -1855,6 +1856,8 @@ class DataFrameUtils():
1855
1856
 
1856
1857
  db_schema = UtilFuncs._extract_db_name(tab_name_first)
1857
1858
  db_table_name = UtilFuncs._extract_table_name(tab_name_first)
1859
+ if dfs[0]._metaexpr.datalake:
1860
+ return DataFrame(in_schema(db_schema, db_table_name, dfs[0]._metaexpr.datalake))
1858
1861
 
1859
1862
  if db_schema:
1860
1863
  return DataFrame(in_schema(db_schema, db_table_name))
@@ -1875,7 +1878,9 @@ class DataFrameUtils():
1875
1878
  db_schema = UtilFuncs._extract_db_name(tab_name_first)
1876
1879
  db_table_name = UtilFuncs._extract_table_name(tab_name_first)
1877
1880
 
1878
- if db_schema:
1881
+ if dfs[i]._metaexpr.datalake:
1882
+ parent_df = DataFrame(in_schema(db_schema, db_table_name, dfs[i]._metaexpr.datalake))
1883
+ elif db_schema:
1879
1884
  parent_df = DataFrame(in_schema(db_schema, db_table_name))
1880
1885
  else:
1881
1886
  parent_df = DataFrame(db_table_name)
@@ -1919,7 +1924,7 @@ class DataFrameUtils():
1919
1924
  # 2. Comma separated parameters enclosed in parentheses
1920
1925
  # 3. Comma separated parameters without parenthesis
1921
1926
  # 4. Remaining string
1922
- pattern = "([A-Z0-9_]+)(\((.*)\))?(.*)"
1927
+ pattern = r"([A-Z0-9_]+)(\((.*)\))?(.*)"
1923
1928
 
1924
1929
  m = re.match(pattern, td_type)
1925
1930
  td_str_type = m.group(1)
@@ -8,7 +8,7 @@ from teradataml.utils.utils import execute_sql
8
8
  import teradatasqlalchemy as tdsqlalchemy
9
9
  from teradataml.utils.validators import _Validators
10
10
  from teradataml.dataframe.sql import _SQLColumnExpression
11
- from teradatasqlalchemy import VARCHAR, CLOB, CHAR
11
+ from teradatasqlalchemy import VARCHAR, CLOB, CHAR, DATE, TIMESTAMP
12
12
  from teradataml.common.constants import TableOperatorConstants, TeradataConstants, TeradataTypes
13
13
  from teradataml.common.utils import UtilFuncs
14
14
  from teradataml.dataframe.sql_interfaces import ColumnExpression
@@ -17,6 +17,7 @@ from teradataml.common.exceptions import TeradataMlException
17
17
  from teradataml.common.messages import Messages
18
18
  from teradataml.common.messagecodes import MessageCodes
19
19
  from teradataml.scriptmgmt.lls_utils import get_env
20
+ from sqlalchemy import literal_column
20
21
 
21
22
  def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',', quotechar=None, debug=False):
22
23
  """
@@ -31,6 +32,8 @@ def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',
31
32
  packages should be inside the user defined function itself.
32
33
  3. Do not call a regular function defined outside the udf() from the user defined function.
33
34
  The function definition and call must be inside the udf(). Look at Example 9 to understand more.
35
+ 4. One can use the `td_buffer` to cache the data in the user defined function.
36
+ Look at Example 10 to understand more.
34
37
 
35
38
  PARAMETERS:
36
39
  user_function:
@@ -321,6 +324,56 @@ def udf(user_function=None, returns=VARCHAR(1024), env_name = None, delimiter=',
321
324
  Alpha Co 210.0 200.0 215.0 250.0 17/01/04 2021-10-06
322
325
  Red Inc 200.0 150.0 140.0 NaN 17/01/04 2021-10-06
323
326
  >>>
327
+
328
+ # Example 10: Define a user defined function 'sentiment_analysis' to perform
329
+ # sentiment analysis on the 'review' column using VADER.
330
+ # Note - Cache the model in UDF using 'td_buffer' to avoid loading
331
+ # the model every time the UDF is called.
332
+
333
+ # Load the data to run the example.
334
+ >>> from teradataml import *
335
+ >>> load_example_data("sentimentextractor", "sentiment_extract_input")
336
+ >>> df = DataFrame("sentiment_extract_input")
337
+
338
+ # Create the environment and install the required library.
339
+ >>> env = create_env('text_analysis', 'python_3.10', 'Test environment for UDF')
340
+ >>> env.install_lib('vaderSentiment')
341
+
342
+ # Create a user defined function to perform sentiment analysis.
343
+ >>> from teradatasqlalchemy.types import VARCHAR
344
+ >>> @udf(env_name = env, returns = VARCHAR(80), delimiter='|')
345
+ ... def sentiment_analysis(txt):
346
+ ... if 'vader_model' not in td_buffer:
347
+ ... from vaderSentiment.vaderSentiment import SentimentIntensityAnalyzer
348
+ ... td_buffer['vader_model'] = SentimentIntensityAnalyzer()
349
+ ... sid_obj = td_buffer['vader_model']
350
+ ...
351
+ ... sentiment_dict = sid_obj.polarity_scores(txt)
352
+ ... if sentiment_dict['compound'] >= 0.05 :
353
+ ... sentiment = "Positive"
354
+ ... elif sentiment_dict['compound'] <= - 0.05 :
355
+ ... sentiment = "Negative"
356
+ ... else :
357
+ ... sentiment = "Neutral"
358
+ ... return sentiment
359
+
360
+ # Assign the Column Expression returned by user defined function
361
+ # to the DataFrame.
362
+ >>> res = df.assign(sentiment = sentiment_analysis('review'))
363
+ >>> res = res.select(["id", "product", "sentiment"])
364
+ >>> res
365
+ id product sentiment
366
+ 0 5 gps Positive
367
+ 1 9 television Negative
368
+ 2 8 camera Negative
369
+ 3 10 camera Negative
370
+ 4 1 camera Positive
371
+ 5 4 gps Positive
372
+ 6 2 office suite Positive
373
+ 7 7 gps Negative
374
+ 8 6 gps Negative
375
+ 9 3 camera Positive
376
+ >>>
324
377
  """
325
378
 
326
379
  allowed_datatypes = TeradataTypes.TD_ALL_TYPES.value
@@ -981,6 +1034,120 @@ def td_range(start, end=None, step=1):
981
1034
  df = DataFrame.from_query(range_query)
982
1035
  return df
983
1036
 
1037
+ def current_date(time_zone='local'):
1038
+ """
1039
+ DESCRIPTION:
1040
+ Returns the current date based on the specified time zone.
1041
+
1042
+ PARAMETERS:
1043
+ time_zone:
1044
+ Optional Argument.
1045
+ Specifies the time zone to use for retrieving the current date.
1046
+ Permitted Values:
1047
+ - "local": Uses the local time zone.
1048
+ - Any valid time zone string.
1049
+ Default Value: "local"
1050
+ Types: str
1051
+
1052
+ RETURNS:
1053
+ ColumnExpression.
1054
+
1055
+ RAISES:
1056
+ None
1057
+
1058
+ EXAMPLES:
1059
+ # Example 1: Add a new column to the DataFrame that contains the
1060
+ # current date as its value. Consider system specified
1061
+ # timezone as timezone.
1062
+ >>> from teradataml.dataframe.functions import current_date
1063
+ >>> load_example_data('dataframe', 'sales')
1064
+ >>> df = DataFrame("sales")
1065
+ >>> df.assign(current_date=current_date())
1066
+ accounts Feb Jan Mar Apr datetime current_date
1067
+ Alpha Co 210.0 200.0 215 250 04/01/2017 25/05/27
1068
+ Blue Inc 90.0 50 95 101 04/01/2017 25/05/27
1069
+ Jones LLC 200.0 150 140 180 04/01/2017 25/05/27
1070
+ Orange Inc 210.0 None None 250 04/01/2017 25/05/27
1071
+ Yellow Inc 90.0 None None None 04/01/2017 25/05/27
1072
+ Red Inc 200.0 150 140 None 04/01/2017 25/05/27
1073
+
1074
+ # Example 2: Add a new column to the DataFrame that contains the
1075
+ # current date in a specific time zone as its value.
1076
+ >>> from teradataml.dataframe.functions import current_date
1077
+ >>> load_example_data('dataframe', 'sales')
1078
+ >>> df = DataFrame("sales")
1079
+ >>> df.assign(current_date=current_date("GMT"))
1080
+ accounts Feb Jan Mar Apr datetime current_date
1081
+ Alpha Co 210.0 200.0 215 250 04/01/2017 25/05/27
1082
+ Blue Inc 90.0 50 95 101 04/01/2017 25/05/27
1083
+ Jones LLC 200.0 150 140 180 04/01/2017 25/05/27
1084
+ Orange Inc 210.0 None None 250 04/01/2017 25/05/27
1085
+ Yellow Inc 90.0 None None None 04/01/2017 25/05/27
1086
+ Red Inc 200.0 150 140 None 04/01/2017 25/05/27
1087
+
1088
+ """
1089
+ if time_zone == "local":
1090
+ expr_ = "CURRENT_DATE AT LOCAL"
1091
+ else:
1092
+ expr_ = "CURRENT_DATE AT TIME ZONE '{}'".format(time_zone)
1093
+ return _SQLColumnExpression(literal_column(expr_), type = DATE())
1094
+
1095
+ def current_timestamp(time_zone='local'):
1096
+ """
1097
+ DESCRIPTION:
1098
+ Returns the current timestamp based on the specified time zone.
1099
+
1100
+ PARAMETERS:
1101
+ time_zone:
1102
+ Optional Argument.
1103
+ Specifies the time zone to use for retrieving the current timestamp.
1104
+ Permitted Values:
1105
+ - "local": Uses the local time zone.
1106
+ - Any valid time zone string.
1107
+ Default Value: "local"
1108
+ Types: str
1109
+
1110
+ RETURNS:
1111
+ ColumnExpression.
1112
+
1113
+ RAISES:
1114
+ None
1115
+
1116
+ EXAMPLES:
1117
+ # Example 1: Assign the current timestamp in the local time zone to a DataFrame column.
1118
+ >>> from teradataml.dataframe.functions import current_timestamp
1119
+ >>> load_example_data('dataframe', 'sales')
1120
+ >>> df = DataFrame("sales")
1121
+ >>> df.assign(current_timestamp = current_timestamp())
1122
+ accounts Feb Jan Mar Apr datetime current_timestamp
1123
+ Alpha Co 210.0 200 215 250 04/01/2017 2025-05-27 17:36:56.750000+00:00
1124
+ Blue Inc 90.0 50 95 101 04/01/2017 2025-05-27 17:36:56.750000+00:00
1125
+ Jones LLC 200.0 150 140 180 04/01/2017 2025-05-27 17:36:56.750000+00:00
1126
+ Orange Inc 210.0 None None 250 04/01/2017 2025-05-27 17:36:56.750000+00:00
1127
+ Yellow Inc 90.0 None None None 04/01/2017 2025-05-27 17:36:56.750000+00:00
1128
+ Red Inc 200.0 150 140 None 04/01/2017 2025-05-27 17:36:56.750000+00:00
1129
+
1130
+ # Example 2: Assign the current timestamp in a specific time zone to a DataFrame column.
1131
+ >>> from teradataml.dataframe.functions import current_timestamp
1132
+ >>> load_example_data('dataframe', 'sales')
1133
+ >>> df = DataFrame("sales")
1134
+ >>> df.assign(current_timestamp = current_timestamp("GMT+10"))
1135
+ accounts Feb Jan Mar Apr datetime current_timestamp
1136
+ Blue Inc 90.0 50 95 101 04/01/2017 2025-05-28 03:39:00.790000+10:00
1137
+ Red Inc 200.0 150 140 None 04/01/2017 2025-05-28 03:39:00.790000+10:00
1138
+ Yellow Inc 90.0 None None None 04/01/2017 2025-05-28 03:39:00.790000+10:00
1139
+ Jones LLC 200.0 150 140 180 04/01/2017 2025-05-28 03:39:00.790000+10:00
1140
+ Orange Inc 210.0 None None 250 04/01/2017 2025-05-28 03:39:00.790000+10:00
1141
+ Alpha Co 210.0 200 215 250 04/01/2017 2025-05-28 03:39:00.790000+10:00
1142
+
1143
+ """
1144
+
1145
+ if time_zone == "local":
1146
+ expr_ = "CURRENT_TIMESTAMP AT LOCAL"
1147
+ else:
1148
+ expr_ = "CURRENT_TIMESTAMP AT TIME ZONE '{}'".format(time_zone)
1149
+ return _SQLColumnExpression(literal_column(expr_), type = TIMESTAMP())
1150
+
984
1151
  def get_formatters(formatter_type = None):
985
1152
  """
986
1153
  DESCRIPTION:
@@ -19,6 +19,7 @@ from teradataml.common.utils import UtilFuncs
19
19
  from teradataml.dataframe import dataframe
20
20
  from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
21
21
  from teradataml.common.aed_utils import AedUtils
22
+ from teradataml.dataframe.sql import _MetaExpression
22
23
  from teradataml.utils.validators import _Validators
23
24
  from teradatasqlalchemy.dialect import dialect as td_dialect, TeradataTypeCompiler as td_type_compiler
24
25
  from teradatasqlalchemy import (GEOMETRY, MBR, MBB)
@@ -346,7 +347,9 @@ def __process_operation(meta_data, is_lazy, setop_type, nodeid, index_label, ind
346
347
  break
347
348
 
348
349
  # Constructing new Metadata (_metaexpr) without DB; using dummy nodeid and get new metaexpr for nodeid.
349
- meta_data = UtilFuncs._get_metaexpr_using_columns(nodeid, column_info) if is_lazy else meta_data
350
+ meta_data = UtilFuncs._get_metaexpr_using_columns(nodeid, column_info,
351
+ datalake=meta_data.datalake if isinstance(meta_data, _MetaExpression) else None) if is_lazy \
352
+ else meta_data
350
353
 
351
354
  if is_lazy:
352
355
  return getattr(module, class_name)._from_node(nodeid, meta_data, index_label)
@@ -200,11 +200,10 @@ class _MetaExpression(object):
200
200
  RAISES:
201
201
  AttributeError if attribute can't be found
202
202
  """
203
-
204
- res = getattr(self.__t, key, None)
205
- if res is None:
203
+ try:
204
+ res = getattr(self.__t, key)
205
+ except AttributeError:
206
206
  raise AttributeError('Unable to find attribute: %s' % key)
207
-
208
207
  return res
209
208
 
210
209
  @property
@@ -501,7 +500,7 @@ class _SQLTableExpression(_PandasTableExpression):
501
500
  self.c = [_SQLColumnExpression(c) for c in table.c]
502
501
 
503
502
  self._n_rows = 0
504
-
503
+ self._datalake = kw.get('datalake', None)
505
504
 
506
505
  @property
507
506
  def c(self):
@@ -557,6 +556,13 @@ class _SQLTableExpression(_PandasTableExpression):
557
556
 
558
557
  self.__t = table
559
558
 
559
+ @property
560
+ def datalake(self):
561
+ """
562
+ Returns the underlying datalake information
563
+ """
564
+ return self._datalake
565
+
560
566
  def __repr__(self):
561
567
  """
562
568
  Returns a SELECT TOP string representing the underlying table.
@@ -10504,24 +10510,24 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10504
10510
  # teradataml does not support regex grouping hence in some cases first used 'regex_replace' and
10505
10511
  # then 'regex_substr' or vice-versa.
10506
10512
  _part_to_extract_dict = {'HOST': _SQLColumnExpression(
10507
- func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), '(//[^/?#]+@)|(//)|(:\d+)', ''),
10513
+ func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), r'(//[^/?#]+@)|(//)|(:\d+)', ''),
10508
10514
  type=VARCHAR()),
10509
10515
  'PATH': _SQLColumnExpression(func.regexp_substr(
10510
10516
  func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
10511
10517
  '([^?#]*)'), type=VARCHAR()),
10512
10518
  'QUERY': _SQLColumnExpression(func.ltrim(func.regexp_substr(
10513
10519
  func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)', ''),
10514
- '\?([^#]*)'), '?'), type=VARCHAR()),
10520
+ r'\?([^#]*)'), '?'), type=VARCHAR()),
10515
10521
  'REF': _SQLColumnExpression(func.ltrim(func.regexp_substr(
10516
10522
  func.regexp_replace(self.expression,
10517
- '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?', ''),
10523
+ r'^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?', ''),
10518
10524
  '(#(.*))'), '#'), type=VARCHAR()),
10519
10525
  'PROTOCOL': _SQLColumnExpression(
10520
10526
  func.rtrim(func.regexp_substr(self.expression, '^(([^:/?#]+):)'), ':'),
10521
10527
  type=VARCHAR()),
10522
10528
  'FILE': _SQLColumnExpression(func.regexp_substr(
10523
10529
  func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
10524
- '([^?#]*)(\?([^#]*))?'), type=VARCHAR()),
10530
+ r'([^?#]*)(\?([^#]*))?'), type=VARCHAR()),
10525
10531
  'AUTHORITY': _SQLColumnExpression(
10526
10532
  func.ltrim(func.regexp_substr(self.expression, '//([^/?#]*)'), '//'),
10527
10533
  type=VARCHAR()),
@@ -10770,3 +10776,129 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10770
10776
 
10771
10777
  """
10772
10778
  return _SQLColumnExpression(literal_column(f"TD_ISFINITE({self.compile()})"), type=INTEGER)
10779
+
10780
+ def between(self, lower, upper):
10781
+ """
10782
+ DESCRIPTION:
10783
+ Evaluates whether the column value is between the lower and upper bounds.
10784
+ The lower and upper bounds are inclusive.
10785
+
10786
+ PARAMETERS:
10787
+ lower:
10788
+ Required Argument.
10789
+ Specifies the lower bound value.
10790
+ Type: ColumnExpression or str or int or float
10791
+
10792
+ upper:
10793
+ Required Argument.
10794
+ Specifies the upper bound value.
10795
+ Type: ColumnExpression or str or int or float
10796
+
10797
+ RETURNS:
10798
+ ColumnExpression
10799
+
10800
+ EXAMPLES:
10801
+ # Load the data to run the example.
10802
+ >>> load_example_data("dataframe", "sales")
10803
+ >>> df = DataFrame("sales")
10804
+ >>> print(df)
10805
+ Feb Jan Mar Apr datetime
10806
+ accounts
10807
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
10808
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
10809
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
10810
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
10811
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
10812
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
10813
+
10814
+ # Example 1: Check if column 'Feb' is between 100 and 200.
10815
+ >>> new_df = df[df.Feb.between(100, 200)]
10816
+ >>> print(new_df)
10817
+ Feb Jan Mar Apr datetime
10818
+ accounts
10819
+ Jones LLC 200.0 150 140 180.0 04/01/2017
10820
+ Red Inc 200.0 150 140 NaN 04/01/2017
10821
+
10822
+ # Example 2: Check if column 'datetime' is between '01-01-2017' and '30-01-2017'.
10823
+ >>> new_df = df[df.datetime.between('01-01-2017', '30-01-2017')]
10824
+ >>> print(new_df)
10825
+ Feb Jan Mar Apr datetime
10826
+ accounts
10827
+ Jones LLC 200.0 150.0 140.0 180.0 04/01/2017
10828
+ Blue Inc 90.0 50.0 95.0 101.0 04/01/2017
10829
+ Yellow Inc 90.0 NaN NaN NaN 04/01/2017
10830
+ Red Inc 200.0 150.0 140.0 NaN 04/01/2017
10831
+ Alpha Co 210.0 200.0 215.0 250.0 04/01/2017
10832
+ Orange Inc 210.0 NaN NaN 250.0 04/01/2017
10833
+ """
10834
+ return _SQLColumnExpression(self.expression.between(lower, upper))
10835
+
10836
+ def begin(self):
10837
+ """
10838
+ DESCRIPTION:
10839
+ Retrieves the beginning date or timestamp from a PERIOD column.
10840
+
10841
+ PARAMETERS:
10842
+ None.
10843
+
10844
+ RETURNS:
10845
+ ColumnExpression.
10846
+
10847
+ RAISES:
10848
+ TeradataMlException.
10849
+
10850
+ EXAMPLES:
10851
+ # Load the data to run the example.
10852
+ >>> load_example_data("teradataml", "Employee_roles")
10853
+
10854
+ # Create a DataFrame on 'employee_roles' table.
10855
+ >>> df = DataFrame("employee_roles")
10856
+
10857
+ # Extract the starting date from the period column 'role_validity_period'
10858
+ # and assign it to a new column.
10859
+ >>> df = df.assign(start_date_col = df['role_validity_period'].begin())
10860
+ EmployeeID EmployeeName Department Salary role_validity_period start_date_col
10861
+ 1 John Doe IT 100.000 ('20/01/01', '24/12/31') 20/01/01
10862
+ 2 Jane Smith DA 200.000 ('20/01/01', '99/12/31') 20/01/01
10863
+ 3 Bob Marketing 330.000 ('25/01/01', '99/12/31') 25/01/01
10864
+ 3 Bob Sales 300.000 ('24/01/01', '24/12/31') 24/01/01
10865
+
10866
+ """
10867
+ _Validators._validate_period_column_type(self._type)
10868
+ element_type = DATE if isinstance(self._type, PERIOD_DATE) else TIMESTAMP
10869
+ return _SQLColumnExpression(literal_column(f"BEGIN({self.compile()})"), type = element_type)
10870
+
10871
+ def end(self):
10872
+ """
10873
+ DESCRIPTION:
10874
+ Retrieves the ending date or timestamp from a PERIOD column.
10875
+
10876
+ PARAMETERS:
10877
+ None.
10878
+
10879
+ RETURNS:
10880
+ ColumnExpression.
10881
+
10882
+ RAISES:
10883
+ TeradataMlException.
10884
+
10885
+ EXAMPLES:
10886
+ # Load the data to run the example.
10887
+ >>> load_example_data("teradataml", "Employee_roles")
10888
+
10889
+ # Create a DataFrame on 'employee_roles' table.
10890
+ >>> df = DataFrame("employee_roles")
10891
+
10892
+ # Extract the ending date from the period column 'role_validity_period'
10893
+ # and assign it to a new column.
10894
+ >>> df = df.assign(end_date_col = df['role_validity_period'].end())
10895
+ EmployeeID EmployeeName Department Salary role_validity_period end_date_col
10896
+ 1 John Doe IT 100.000 ('20/01/01', '24/12/31') 24/12/31
10897
+ 2 Jane Smith DA 200.000 ('20/01/01', '99/12/31') 99/12/31
10898
+ 3 Bob Marketing 330.000 ('25/01/01', '99/12/31') 99/12/31
10899
+ 3 Bob Sales 300.000 ('24/01/01', '24/12/31') 24/12/31
10900
+
10901
+ """
10902
+ _Validators._validate_period_column_type(self._type)
10903
+ element_type = DATE if isinstance(self._type, PERIOD_DATE) else TIMESTAMP
10904
+ return _SQLColumnExpression(literal_column(f"END({self.compile()})"), type = element_type)