teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (96) hide show
  1. teradataml/README.md +210 -0
  2. teradataml/__init__.py +1 -1
  3. teradataml/_version.py +1 -1
  4. teradataml/analytics/analytic_function_executor.py +162 -76
  5. teradataml/analytics/byom/__init__.py +1 -1
  6. teradataml/analytics/json_parser/__init__.py +2 -0
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
  8. teradataml/analytics/json_parser/metadata.py +22 -4
  9. teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
  10. teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
  11. teradataml/analytics/sqle/__init__.py +3 -0
  12. teradataml/analytics/utils.py +4 -1
  13. teradataml/automl/__init__.py +2369 -464
  14. teradataml/automl/autodataprep/__init__.py +15 -0
  15. teradataml/automl/custom_json_utils.py +184 -112
  16. teradataml/automl/data_preparation.py +113 -58
  17. teradataml/automl/data_transformation.py +154 -53
  18. teradataml/automl/feature_engineering.py +113 -53
  19. teradataml/automl/feature_exploration.py +548 -25
  20. teradataml/automl/model_evaluation.py +260 -32
  21. teradataml/automl/model_training.py +399 -206
  22. teradataml/clients/auth_client.py +2 -2
  23. teradataml/common/aed_utils.py +11 -2
  24. teradataml/common/bulk_exposed_utils.py +4 -2
  25. teradataml/common/constants.py +62 -2
  26. teradataml/common/garbagecollector.py +50 -21
  27. teradataml/common/messagecodes.py +47 -2
  28. teradataml/common/messages.py +19 -1
  29. teradataml/common/sqlbundle.py +23 -6
  30. teradataml/common/utils.py +116 -10
  31. teradataml/context/aed_context.py +16 -10
  32. teradataml/data/Employee.csv +5 -0
  33. teradataml/data/Employee_Address.csv +4 -0
  34. teradataml/data/Employee_roles.csv +5 -0
  35. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  36. teradataml/data/byom_example.json +5 -0
  37. teradataml/data/creditcard_data.csv +284618 -0
  38. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  39. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
  40. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
  42. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  43. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
  44. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
  45. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
  46. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
  47. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
  48. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
  49. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
  50. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
  51. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
  52. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
  53. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
  54. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  55. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  56. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  57. teradataml/data/load_example_data.py +29 -11
  58. teradataml/data/payment_fraud_dataset.csv +10001 -0
  59. teradataml/data/teradataml_example.json +67 -0
  60. teradataml/dataframe/copy_to.py +714 -54
  61. teradataml/dataframe/dataframe.py +1153 -33
  62. teradataml/dataframe/dataframe_utils.py +8 -3
  63. teradataml/dataframe/functions.py +168 -1
  64. teradataml/dataframe/setop.py +4 -1
  65. teradataml/dataframe/sql.py +141 -9
  66. teradataml/dbutils/dbutils.py +470 -35
  67. teradataml/dbutils/filemgr.py +1 -1
  68. teradataml/hyperparameter_tuner/optimizer.py +456 -142
  69. teradataml/lib/aed_0_1.dll +0 -0
  70. teradataml/lib/libaed_0_1.dylib +0 -0
  71. teradataml/lib/libaed_0_1.so +0 -0
  72. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  73. teradataml/scriptmgmt/UserEnv.py +234 -34
  74. teradataml/scriptmgmt/lls_utils.py +43 -17
  75. teradataml/sdk/_json_parser.py +1 -1
  76. teradataml/sdk/api_client.py +9 -6
  77. teradataml/sdk/modelops/_client.py +3 -0
  78. teradataml/series/series.py +12 -7
  79. teradataml/store/feature_store/constants.py +601 -234
  80. teradataml/store/feature_store/feature_store.py +2886 -616
  81. teradataml/store/feature_store/mind_map.py +639 -0
  82. teradataml/store/feature_store/models.py +5831 -214
  83. teradataml/store/feature_store/utils.py +390 -0
  84. teradataml/table_operators/table_operator_util.py +1 -1
  85. teradataml/table_operators/templates/dataframe_register.template +6 -2
  86. teradataml/table_operators/templates/dataframe_udf.template +6 -2
  87. teradataml/utils/docstring.py +527 -0
  88. teradataml/utils/dtypes.py +93 -0
  89. teradataml/utils/internal_buffer.py +2 -2
  90. teradataml/utils/utils.py +41 -2
  91. teradataml/utils/validators.py +694 -17
  92. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
  93. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
  94. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
  95. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
  96. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
@@ -27,8 +27,10 @@ import pandas as pd
27
27
  import sqlalchemy
28
28
  from sqlalchemy import Column
29
29
  from sqlalchemy.exc import NoSuchColumnError
30
+ from datetime import datetime, date
30
31
  from sqlalchemy.sql import ClauseElement
31
32
  from teradatasql import OperationalError
33
+ from teradatasqlalchemy import types as tdtypes
32
34
  from teradatasqlalchemy.dialect import dialect as td_dialect
33
35
  from teradatasqlalchemy.dialect import preparer
34
36
  from teradatasqlalchemy.types import (BIGINT, BYTEINT, DECIMAL, FLOAT, INTEGER,
@@ -38,7 +40,7 @@ import teradataml.context.context as tdmlctx
38
40
  from teradataml import GarbageCollector, execute_sql
39
41
  from teradataml.common.bulk_exposed_utils import \
40
42
  _validate_unimplemented_function
41
- from teradataml.common.constants import (AEDConstants, OutputStyle,
43
+ from teradataml.common.constants import (AEDConstants, DataFrameTypes, OutputStyle,
42
44
  PTITableConstants, PythonTypes,
43
45
  SourceType, SQLConstants,
44
46
  SQLFunctionConstants,
@@ -239,6 +241,15 @@ class DataFrame():
239
241
  included in the DataFrame if the dictionary contains those keys. If the dictionary does not
240
242
  contain the specified keys, those columns will be added with NaN values.
241
243
  Types: str OR list of str
244
+
245
+ persist:
246
+ Optional Argument.
247
+ Specifies whether to persist the DataFrame.
248
+ Note:
249
+ * This argument is only applicable when the "data" argument is of type dict, list or
250
+ pandas DataFrame.
251
+ Default Value: False
252
+ Types: bool
242
253
 
243
254
  EXAMPLES:
244
255
  >>> from teradataml.dataframe.dataframe import DataFrame
@@ -366,15 +377,24 @@ class DataFrame():
366
377
  # This attribute stores the root DataFrame columns.
367
378
  self._root_columns = None
368
379
 
380
+ # Internal argument, when this attribute is set to True, the teradataml DataFrame locks
381
+ # the corresponding row(s) in the underlying table(s) while accessing the data.
382
+ _lock_rows = kwargs.get("_lock_rows", False)
383
+
369
384
  self._datalake = None
370
385
  self._database = None
371
386
  self._table = None
372
387
  self._otf = False
388
+ self._df_type = None
389
+ self._valid_time_column = None
390
+ self._transaction_time_column = None
391
+
373
392
 
374
393
  table_name = kwargs.get("table_name", None)
375
394
  primary_index = kwargs.get("primary_index", None)
376
395
  columns = kwargs.get("columns", None)
377
396
  types = kwargs.get("types", None)
397
+ persist = kwargs.get("persist", False)
378
398
 
379
399
  # Check if the data is an instance of in_schema or if the data is None
380
400
  # and table_name is an instance of in_schema, then assign the table_name,
@@ -441,9 +461,11 @@ class DataFrame():
441
461
  pd_data = data.copy()
442
462
  # If the columns are not of type string, then convert them to string.
443
463
  pd_data.columns = [f"col_{i}" if isinstance(i, int) else i for i in pd_data.columns]
464
+
444
465
  # Set the table_name to the name of the table created in the database.
445
466
  table_name = UtilFuncs._generate_temp_table_name(prefix="from_pandas",
446
- table_type=TeradataConstants.TERADATA_TABLE)
467
+ table_type=TeradataConstants.TERADATA_TABLE,
468
+ gc_on_quit=not(persist))
447
469
 
448
470
  copy_to_sql(pd_data, table_name, index=index, index_label=index_label, primary_index=primary_index,
449
471
  types=types)
@@ -491,6 +513,8 @@ class DataFrame():
491
513
 
492
514
  if configure.temp_object_type == TeradataConstants.TERADATA_VOLATILE_TABLE:
493
515
  __execute_params = (self._table_name, self._query, True)
516
+ elif configure.temp_object_type == TeradataConstants.TERADATA_VIEW:
517
+ __execute_params = (self._table_name, self._query, _lock_rows)
494
518
 
495
519
  try:
496
520
  __execute(*__execute_params)
@@ -519,6 +543,7 @@ class DataFrame():
519
543
  raise TeradataMlException(Messages.get_message(MessageCodes.TDMLDF_CREATE_FAIL),
520
544
  MessageCodes.TDMLDF_CREATE_FAIL)
521
545
 
546
+ # _get_metaexpr() can be only used if self._table_name is set.
522
547
  if table_name or query:
523
548
  self._metaexpr = self._get_metaexpr()
524
549
  self._get_metadata_from_metaexpr(self._metaexpr)
@@ -717,7 +742,7 @@ class DataFrame():
717
742
 
718
743
  @classmethod
719
744
  @collect_queryband(queryband="DF_fromQuery")
720
- def from_query(cls, query, index=True, index_label=None, materialize=False):
745
+ def from_query(cls, query, index=True, index_label=None, materialize=False, **kwargs):
721
746
  """
722
747
  Class method for creating a DataFrame from a query.
723
748
 
@@ -815,6 +840,7 @@ class DataFrame():
815
840
  df._nodeid = nodeid
816
841
  df._source_type = SourceType.TABLE.value
817
842
 
843
+
818
844
  if not reuse_metaexpr:
819
845
  # Create new _MetaExpression object using reference metaExpression
820
846
  # for newly created DataFrame.
@@ -862,7 +888,7 @@ class DataFrame():
862
888
 
863
889
  @classmethod
864
890
  @collect_queryband(queryband="DF_fromPandas")
865
- def from_pandas(cls, pandas_df, index=True, index_label=None, primary_index=None):
891
+ def from_pandas(cls, pandas_df, index=True, index_label=None, primary_index=None, persist=False):
866
892
  """
867
893
  DESCRIPTION:
868
894
  Creates a teradataml DataFrame from a pandas DataFrame.
@@ -892,6 +918,12 @@ class DataFrame():
892
918
  Specifies which column(s) to use as primary index for the teradataml DataFrame.
893
919
  Types: str OR list of str
894
920
 
921
+ persist:
922
+ Optional Argument.
923
+ Specifies whether to persist the DataFrame.
924
+ Default Value: False
925
+ Types: bool
926
+
895
927
  RETURNS:
896
928
  teradataml DataFrame
897
929
 
@@ -942,14 +974,15 @@ class DataFrame():
942
974
  # Validate 'pandas_df' argument, other arguments, will be validated as part of DataFrame().
943
975
  arg_type_matrix = []
944
976
  arg_type_matrix.append(["pandas_df", pandas_df, False, (pd.DataFrame,), True])
945
-
977
+ arg_type_matrix.append(["persist", persist, True, (bool), True])
978
+
946
979
  _Validators._validate_function_arguments(arg_type_matrix)
947
980
 
948
- return cls(pandas_df, index, index_label, primary_index=primary_index)
981
+ return cls(pandas_df, index, index_label, primary_index=primary_index, persist=persist)
949
982
 
950
983
  @classmethod
951
984
  @collect_queryband(queryband="DF_fromDict")
952
- def from_dict(cls, data, columns=None):
985
+ def from_dict(cls, data, columns=None, persist=False):
953
986
  """
954
987
  DESCRIPTION:
955
988
  Creates a DataFrame from a dictionary containing values as lists or numpy arrays.
@@ -969,6 +1002,12 @@ class DataFrame():
969
1002
  Specifies the column names for the DataFrame.
970
1003
  Types: str OR list of str
971
1004
 
1005
+ persist:
1006
+ Optional Argument.
1007
+ Specifies whether to persist the DataFrame.
1008
+ Default Value: False
1009
+ Types: bool
1010
+
972
1011
  RETURNS:
973
1012
  teradataml DataFrame
974
1013
 
@@ -1002,10 +1041,11 @@ class DataFrame():
1002
1041
  arg_type_matrix = []
1003
1042
  arg_type_matrix.append(["data", data, False, (dict), True])
1004
1043
  arg_type_matrix.append(["columns", columns, True, (str, list), True])
1044
+ arg_type_matrix.append(["persist", persist, True, (bool), True])
1005
1045
 
1006
1046
  _Validators._validate_function_arguments(arg_type_matrix)
1007
1047
 
1008
- return cls(data, columns=columns, index=False)
1048
+ return cls(data, columns=columns, index=False, persist=persist)
1009
1049
 
1010
1050
  @classmethod
1011
1051
  @collect_queryband(queryband="DF_fromRecords")
@@ -1049,6 +1089,12 @@ class DataFrame():
1049
1089
  Specifies the number of rows to be read from the data if the data is iterator.
1050
1090
  Types: int
1051
1091
 
1092
+ persist:
1093
+ Optional Argument.
1094
+ Specifies whether to persist the DataFrame.
1095
+ Default Value: False
1096
+ Types: bool
1097
+
1052
1098
  RETURNS:
1053
1099
  teradataml DataFrame
1054
1100
 
@@ -1136,6 +1182,7 @@ class DataFrame():
1136
1182
  exclude = kwargs.get("exclude", None)
1137
1183
  coerce_float = kwargs.get("coerce_float", True)
1138
1184
  nrows = kwargs.get("nrows", None)
1185
+ persist = kwargs.get("persist", False)
1139
1186
 
1140
1187
  arg_type_matrix = []
1141
1188
  dtypes = (list, tuple, dict)
@@ -1144,6 +1191,7 @@ class DataFrame():
1144
1191
  arg_type_matrix.append(["exclude", exclude, True, (_ListOf(str),), True])
1145
1192
  arg_type_matrix.append(["coerce_float", coerce_float, True, (bool, ), True])
1146
1193
  arg_type_matrix.append(["nrows", nrows, True, (int,), True])
1194
+ arg_type_matrix.append(["persist", persist, True, (bool,), True])
1147
1195
 
1148
1196
  _Validators._validate_function_arguments(arg_type_matrix)
1149
1197
 
@@ -1152,7 +1200,7 @@ class DataFrame():
1152
1200
 
1153
1201
  df = pd.DataFrame.from_records(data, columns=columns, exclude=exclude,
1154
1202
  coerce_float=coerce_float, nrows=nrows)
1155
- return cls(df, index=False)
1203
+ return cls(df, index=False, persist=persist)
1156
1204
 
1157
1205
  def create_temp_view(self, name):
1158
1206
  """
@@ -1546,6 +1594,57 @@ class DataFrame():
1546
1594
  self._is_art = res[0][0] == 1
1547
1595
  return self._is_art
1548
1596
 
1597
+
1598
+ def _process_columns_metadata(self):
1599
+ """
1600
+ DESCRIPTION:
1601
+ Processes the metadata of columns to determine their time dimension properties
1602
+ and to check whether database object is a view, volatile table, or ART table.
1603
+
1604
+ PARAMETERS:
1605
+ None
1606
+
1607
+ RAISES:
1608
+ None
1609
+
1610
+ RETURNS:
1611
+ Tuple containing five boolean values:
1612
+ - is_view: True if the database object is a view, False otherwise.
1613
+ - is_volatile: True if the database object is a volatile table, False otherwise.
1614
+ - is_art_table: True if the database object is an ART table, False otherwise.
1615
+ - has_valid_time: True if any column has a valid time dimension, False otherwise.
1616
+ - has_transaction_time: True if any column has a transaction time dimension, False otherwise.
1617
+ EXAMPLES:
1618
+ >>> load_example_data("teradataml", "Employee")
1619
+ >>> df = DataFrame.from_table("Employee")
1620
+ >>> is_view, is_volatile, is_art_table, valid_time, transaction_time = (
1621
+ df._process_columns_metadata()
1622
+ )
1623
+ >>> is_view, is_volatile, is_art_table, valid_time, transaction_time
1624
+ (False, False, False, True, True)
1625
+
1626
+ """
1627
+
1628
+ is_view = is_volatile = is_art_table = False
1629
+
1630
+ for col in self._metaexpr.c:
1631
+ metadata = col.expression.info
1632
+ time_dimension = metadata.get('time_dimension')
1633
+ is_view = metadata.get('is_view', is_view)
1634
+ is_volatile = metadata.get('is_volatile', is_volatile)
1635
+ is_art_table = metadata.get('is_art_table', is_art_table)
1636
+
1637
+ if time_dimension == "V":
1638
+ self._valid_time_column = col
1639
+
1640
+ if time_dimension == "T":
1641
+ self._transaction_time_column = col
1642
+
1643
+ has_valid_time = self._valid_time_column is not None
1644
+ has_transaction_time = self._transaction_time_column is not None
1645
+
1646
+ return is_view, is_volatile, is_art_table, has_valid_time, has_transaction_time
1647
+
1549
1648
  def _get_metadata_from_metaexpr(self, metaexpr):
1550
1649
  """
1551
1650
  Private method for setting _metaexpr and retrieving column names and types.
@@ -1598,6 +1697,7 @@ class DataFrame():
1598
1697
  meta = sqlalchemy.MetaData()
1599
1698
  db_schema = UtilFuncs._extract_db_name(self._table_name)
1600
1699
  db_table_name = UtilFuncs._extract_table_name(self._table_name)
1700
+
1601
1701
  if not self._datalake:
1602
1702
  t = sqlalchemy.Table(db_table_name, meta, schema=db_schema, autoload_with=eng)
1603
1703
  return _MetaExpression(t)
@@ -1626,7 +1726,7 @@ class DataFrame():
1626
1726
  # Create a SQLAlchemy table object representing datalake table.
1627
1727
  t = sqlalchemy.Table(self._table, meta, schema=self._database,
1628
1728
  *(Column(col_name, col_type) for col_name, col_type in zip(col_names, col_types)))
1629
- return _MetaExpression(t)
1729
+ return _MetaExpression(t, datalake=self._datalake)
1630
1730
 
1631
1731
  def __getattr__(self, name):
1632
1732
  """
@@ -2165,6 +2265,148 @@ class DataFrame():
2165
2265
  td_metadata = [(column.name, repr(column.type)) for column in self._metaexpr.c]
2166
2266
  return MetaData(td_metadata)
2167
2267
 
2268
+ @property
2269
+ def df_type(self):
2270
+ """
2271
+ DESCRIPTION:
2272
+ Returns the type of the DataFrame based on the underlying database object.
2273
+ Possible teradataml DataFrame types are:
2274
+ - VALID_TIME_VIEW: DataFrame is created on Valid-Time dimension view.
2275
+ - TRANSACTION_TIME_VIEW: DataFrame is created on Transaction-Time dimension view.
2276
+ - BI_TEMPORAL_VIEW: DataFrame is created on Bi-temporal view.
2277
+ - VALID_TIME: DataFrame is created on Valid-Time dimension table.
2278
+ - TRANSACTION_TIME: DataFrame is created on Transaction-Time dimension table.
2279
+ - BI_TEMPORAL: DataFrame is created on Bi-temporal dimension table.
2280
+ - VIEW: DataFrame is created on a view.
2281
+ - TABLE: DataFrame is created on a table.
2282
+ - OTF: DataFrame is created on an OTF table.
2283
+ - ART: DataFrame is created on an ART table.
2284
+ - VOLATILE_TABLE: DataFrame is created on a volatile table.
2285
+ - BI_TEMPORAL_VOLATILE_TABLE: DataFrame is created on a Bi-temporal dimension volatile table.
2286
+ - VALID_TIME_VOLATILE_TABLE: DataFrame is created on a Valid-Time dimension volatile table.
2287
+ - TRANSACTION_TIME_VOLATILE_TABLE: DataFrame is created on a Transaction-Time dimension volatile table.
2288
+
2289
+ RETURNS:
2290
+ str
2291
+
2292
+ RAISES:
2293
+ None
2294
+
2295
+ EXAMPLES:
2296
+ # Load the data to run the example.
2297
+ >>> load_example_data("teradataml", "Employee_roles") # load valid time data.
2298
+ >>> load_example_data("teradataml", "Employee_Address") # load transaction time data.
2299
+ >>> load_example_data("teradataml", "Employee") # load bitemporal data.
2300
+ >>> load_example_data("uaf", ["ocean_buoys2"]) # load data to create art table.
2301
+ >>> load_example_data('dataframe', ['admissions_train']) # load data to create a regular table.
2302
+
2303
+ # Example 1: DataFrame created on a Valid-Time dimension table.
2304
+ >>> df = DataFrame.from_table('Employee_roles')
2305
+ >>> df.df_type
2306
+ 'VALID_TIME'
2307
+
2308
+ # Example 2: DataFrame created on a Transaction-Time dimension table.
2309
+ >>> df = DataFrame.from_table('Employee_Address')
2310
+ >>> df.df_type
2311
+ 'TRANSACTION_TIME'
2312
+
2313
+ # Example 3: DataFrame created on a Bi-temporal dimension table.
2314
+ >>> df = DataFrame.from_table('Employee')
2315
+ >>> df.df_type
2316
+ 'BI_TEMPORAL'
2317
+
2318
+ # Example 4: DataFrame created on a ART table.
2319
+ >>> data = DataFrame.from_table('ocean_buoys2')
2320
+ >>> from teradataml import TDSeries,SInfo
2321
+ >>> data_series_df = TDSeries(data=data,
2322
+ ... id=["ocean_name","buoyid"],
2323
+ ... row_index="TD_TIMECODE",
2324
+ ... row_index_style="TIMECODE",
2325
+ ... payload_field="jsoncol.Measure.salinity",
2326
+ ... payload_content="REAL")
2327
+ >>> uaf_out = SInfo(data=data_series_df, output_table_name='TSINFO_RESULTS')
2328
+ >>> df = DataFrame.from_table('TSINFO_RESULTS')
2329
+ >>> df.df_type
2330
+ 'ART'
2331
+
2332
+ # Example 5: DataFrame created on a regular table.
2333
+ >>> df = DataFrame.from_table('admissions_train')
2334
+ >>> df.df_type
2335
+ 'REGULAR_TABLE'
2336
+
2337
+ # Example 6: DataFrame created on a volatile table.
2338
+ >>> df = DataFrame.from_table('admissions_train')
2339
+ >>> df.to_sql(table_name='admissions_train_volatile', temporary=True)
2340
+ >>> df = DataFrame.from_table('admissions_train_volatile')
2341
+ >>> df.df_type
2342
+ 'VOLATILE_TABLE'
2343
+
2344
+ # Example 7: DataFrame created on a Bi-temporal dimension view.
2345
+ >>> execute_sql('create view Employee_view AS SEQUENCED VALIDTIME AND SEQUENCED TRANSACTIONTIME select * from Employee')
2346
+ >>> df = DataFrame.from_table('Employee_view')
2347
+ >>> df.df_type
2348
+ 'BI_TEMPORAL_VIEW'
2349
+
2350
+ """
2351
+
2352
+ if self._df_type is not None:
2353
+ return self._df_type
2354
+
2355
+ is_view, is_volatile, is_art_table, valid_time, transaction_time = (
2356
+ self._process_columns_metadata()
2357
+ )
2358
+
2359
+ # Check if the DataFrame is created from an OTF table
2360
+ if self._otf:
2361
+ self._df_type = DataFrameTypes.OTF_TABLE.value
2362
+ return self._df_type
2363
+
2364
+ # Check if the DataFrame is created from an ART table
2365
+ if is_art_table:
2366
+ self._df_type = DataFrameTypes.ART_TABLE.value
2367
+ return self._df_type
2368
+
2369
+ # Determine the type based on valid-time, transaction-time columns, and volatility
2370
+ if valid_time and transaction_time:
2371
+ if is_volatile:
2372
+ self._df_type = DataFrameTypes.BI_TEMPORAL_VOLATILE_TABLE.value
2373
+ else:
2374
+ self._df_type = (
2375
+ DataFrameTypes.BI_TEMPORAL_VIEW.value
2376
+ if is_view
2377
+ else DataFrameTypes.BI_TEMPORAL.value
2378
+ )
2379
+ elif valid_time:
2380
+ if is_volatile:
2381
+ self._df_type = DataFrameTypes.VALID_TIME_VOLATILE_TABLE.value
2382
+ else:
2383
+ self._df_type = (
2384
+ DataFrameTypes.VALID_TIME_VIEW.value
2385
+ if is_view
2386
+ else DataFrameTypes.VALID_TIME.value
2387
+ )
2388
+ elif transaction_time:
2389
+ if is_volatile:
2390
+ self._df_type = DataFrameTypes.TRANSACTION_TIME_VOLATILE_TABLE.value
2391
+ else:
2392
+ self._df_type = (
2393
+ DataFrameTypes.TRANSACTION_TIME_VIEW.value
2394
+ if is_view
2395
+ else DataFrameTypes.TRANSACTION_TIME.value
2396
+ )
2397
+ else:
2398
+ self._df_type = (
2399
+ DataFrameTypes.VOLATILE_TABLE.value
2400
+ if is_volatile
2401
+ else (
2402
+ DataFrameTypes.VIEW.value
2403
+ if is_view
2404
+ else DataFrameTypes.REGULAR_TABLE.value
2405
+ )
2406
+ )
2407
+
2408
+ return self._df_type
2409
+
2168
2410
  @collect_queryband(queryband="DF_info")
2169
2411
  def info(self, verbose=True, buf=None, max_cols=None, null_counts=False):
2170
2412
  """
@@ -6408,7 +6650,8 @@ class DataFrame():
6408
6650
 
6409
6651
  new_metaexpr = UtilFuncs._get_metaexpr_using_columns(aggregate_node_id,
6410
6652
  zip(new_column_names,
6411
- new_column_types))
6653
+ new_column_types),
6654
+ datalake=self._metaexpr.datalake)
6412
6655
  agg_df = self._create_dataframe_from_node \
6413
6656
  (aggregate_node_id, new_metaexpr, self._index_label)
6414
6657
 
@@ -6827,7 +7070,8 @@ class DataFrame():
6827
7070
  sel_nodeid = self._aed_utils._aed_select(self._nodeid, column_expression)
6828
7071
 
6829
7072
  # Constructing new Metadata (_metaexpr) without DB; using dummy select_nodeid and underlying table name.
6830
- new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sel_nodeid, col_names_types.items())
7073
+ new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sel_nodeid, col_names_types.items(),
7074
+ datalake=self._metaexpr.datalake)
6831
7075
  return self._create_dataframe_from_node(sel_nodeid, new_metaexpr, self._index_label)
6832
7076
 
6833
7077
  except TeradataMlException:
@@ -7777,7 +8021,8 @@ class DataFrame():
7777
8021
 
7778
8022
  # Step 4: Constructing new Metadata (_metaexpr) without DB; using dummy select_nodeid
7779
8023
  # and underlying table name.
7780
- new_metaexpr = UtilFuncs._get_metaexpr_using_columns(join_node_id, new_metaexpr_columns_types.items())
8024
+ new_metaexpr = UtilFuncs._get_metaexpr_using_columns(join_node_id, new_metaexpr_columns_types.items(),
8025
+ datalake=self._metaexpr.datalake)
7781
8026
 
7782
8027
  # Return a new joined dataframe.
7783
8028
  return self._create_dataframe_from_node(join_node_id, new_metaexpr, self._index_label)
@@ -9150,7 +9395,6 @@ class DataFrame():
9150
9395
 
9151
9396
  return df
9152
9397
 
9153
-
9154
9398
  @collect_queryband(queryband="DF_get")
9155
9399
  def get(self, key):
9156
9400
  """
@@ -9260,7 +9504,7 @@ class DataFrame():
9260
9504
  append:
9261
9505
  Optional Argument.
9262
9506
  Specifies whether or not to append requested columns to the existing index.
9263
- ` When append is False, replaces existing index.
9507
+ When append is False, replaces existing index.
9264
9508
  When append is True, retains both existing & currently appended index.
9265
9509
  Default Value: False
9266
9510
  Types: bool
@@ -9476,8 +9720,8 @@ class DataFrame():
9476
9720
  include_grouping_columns:
9477
9721
  Optional Argument.
9478
9722
  Specifies whether to include aggregations on the grouping column(s) or not.
9479
- When set to True, the resultant DataFrame will have the aggregations on the
9480
- columns mentioned in "columns_expr". Otherwise, resultant DataFrame will not have
9723
+ When set to True, the resultant DataFrame will have the aggregations on the
9724
+ columns mentioned in "columns_expr". Otherwise, resultant DataFrame will not have
9481
9725
  aggregations on the columns mentioned in "columns_expr".
9482
9726
  Default Value: False
9483
9727
  Types: bool
@@ -9517,7 +9761,7 @@ class DataFrame():
9517
9761
  26 yes 3.57 Advanced Advanced 1
9518
9762
  17 no 3.83 Advanced Advanced 1
9519
9763
 
9520
- # Example 1: Find the minimum value of all valid columns by
9764
+ # Example 1: Find the minimum value of all valid columns by
9521
9765
  # grouping the DataFrame with column 'masters'.
9522
9766
  >>> df1 = df.groupby(["masters"])
9523
9767
  >>> df1.min()
@@ -9538,7 +9782,7 @@ class DataFrame():
9538
9782
 
9539
9783
  # Example 3: Find the sum of all valid columns by grouping the DataFrame with
9540
9784
  # columns 'masters' and 'admitted'. Do not include grouping columns
9541
- # in aggregate function 'sum'.
9785
+ # in aggregate function 'sum'.
9542
9786
  >>> df1 = df.groupby(["masters", "admitted"], include_grouping_columns=False)
9543
9787
  >>> df1.sum()
9544
9788
  masters admitted sum_id sum_gpa
@@ -11964,7 +12208,8 @@ class DataFrame():
11964
12208
  column_info = ((col_name, col_type) for col_name, col_type in
11965
12209
  new_metaexpr_columns_types.items())
11966
12210
  # Get new metaexpr for sample_node_id
11967
- new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sample_node_id, column_info, is_persist=True)
12211
+ new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sample_node_id, column_info, is_persist=True,
12212
+ datalake=self._metaexpr.datalake)
11968
12213
 
11969
12214
  # Make this non-lazy. Added this in order to fix https://teradata-pe.atlassian.net/browse/ELE-6368
11970
12215
  # Cannot use __execute_node_and_set_table_name because self points to original df.
@@ -13399,9 +13644,9 @@ class DataFrame():
13399
13644
  3. When ColumnExpression(s) is(are) passed to "order_columns", then the
13400
13645
  corresponding expression takes precedence over arguments
13401
13646
  "sort_ascending" and "nulls_first". Say, ColumnExpression is col1, then
13402
- 1. col1.asc() or col.desc() is effective irrespective of "sort_ascending".
13403
- 2. col1.nulls_first() or col.nulls_last() is effective irrespective of "nulls_first".
13404
- 3. Any combination of above two take precedence over "sort_ascending" and "nulls_first".
13647
+ 1. col1.asc() or col.desc() is effective irrespective of "sort_ascending".
13648
+ 2. col1.nulls_first() or col.nulls_last() is effective irrespective of "nulls_first".
13649
+ 3. Any combination of above two take precedence over "sort_ascending" and "nulls_first".
13405
13650
  Types: str OR list of Strings (str) OR ColumnExpression OR list of ColumnExpressions
13406
13651
 
13407
13652
  sort_ascending:
@@ -13682,7 +13927,9 @@ class DataFrame():
13682
13927
 
13683
13928
  col_names_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr, column_names)
13684
13929
  sel_nodeid = self._aed_utils._aed_select(self._nodeid, ','.join(column_names), True)
13685
- new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sel_nodeid, col_names_types.items())
13930
+ new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sel_nodeid, col_names_types.items(),
13931
+ datalake=self._metaexpr.datalake)
13932
+
13686
13933
  return self._create_dataframe_from_node(sel_nodeid, new_metaexpr, self._index_label)
13687
13934
 
13688
13935
  @collect_queryband(queryband="DF_toCsv")
@@ -15880,8 +16127,8 @@ class DataFrame():
15880
16127
  include_grouping_columns:
15881
16128
  Optional Argument.
15882
16129
  Specifies whether to include aggregations on the grouping column(s) or not.
15883
- When set to True, the resultant DataFrame will have the aggregations on the
15884
- columns mentioned in "columns". Otherwise, resultant DataFrame will not have
16130
+ When set to True, the resultant DataFrame will have the aggregations on the
16131
+ columns mentioned in "columns". Otherwise, resultant DataFrame will not have
15885
16132
  aggregations on the columns mentioned in "columns".
15886
16133
  Default Value: False
15887
16134
  Types: bool
@@ -15946,7 +16193,7 @@ class DataFrame():
15946
16193
 
15947
16194
  # Example 3: Find the avg of all valid columns by grouping the DataFrame with
15948
16195
  # columns 'masters' and 'admitted'. Do not include grouping columns
15949
- # in aggregate function 'avg'.
16196
+ # in aggregate function 'avg'.
15950
16197
  >>> df1 = df.cube(["masters", "admitted"], include_grouping_columns=False).avg()
15951
16198
  >>> df1
15952
16199
  masters admitted avg_id avg_gpa
@@ -15993,8 +16240,8 @@ class DataFrame():
15993
16240
  include_grouping_columns:
15994
16241
  Optional Argument.
15995
16242
  Specifies whether to include aggregations on the grouping column(s) or not.
15996
- When set to True, the resultant DataFrame will have the aggregations on the
15997
- columns mentioned in "columns". Otherwise, resultant DataFrame will not have
16243
+ When set to True, the resultant DataFrame will have the aggregations on the
16244
+ columns mentioned in "columns". Otherwise, resultant DataFrame will not have
15998
16245
  aggregations on the columns mentioned in "columns".
15999
16246
  Default Value: False
16000
16247
  Types: bool
@@ -16039,7 +16286,7 @@ class DataFrame():
16039
16286
  6 yes Beginner 13 14.71 2
16040
16287
  7 yes Advanced 366 49.26 7
16041
16288
  8 no Advanced 189 34.95 9
16042
-
16289
+
16043
16290
  # Example 2: Find the avg of all valid columns by grouping the DataFrame
16044
16291
  # with columns 'masters' and 'admitted'. Include grouping columns
16045
16292
  # in aggregate function 'avg'.
@@ -16331,6 +16578,878 @@ class DataFrame():
16331
16578
  "Use valid timestamp or correct snapshot id listed using 'snapshots' property.".format(as_of)),
16332
16579
  MessageCodes.FUNC_EXECUTION_FAILED)
16333
16580
 
16581
+ def as_of(self, **kwargs):
16582
+ """
16583
+ DESCRIPTION:
16584
+ Function to get DataFrame at specific time on temporal table.
16585
+ Note:
16586
+ Function is supported only on temporal tables or temporal views.
16587
+
16588
+ PARAMETERS:
16589
+ kwargs:
16590
+ Specifies keyword arguments.
16591
+
16592
+ valid_time:
16593
+ Optional Argument.
16594
+ Specifies the valid time to retrieve data from DataFrame created on either ValidTime
16595
+ or BiTemporal table/view.
16596
+ Notes:
16597
+ * Either "valid_time" or "transaction_time" must be provided.
16598
+ * Argument accepts below values:
16599
+ * "current" - to get the current valid time data.
16600
+ * any string other than "current" is considered as date and data will be retrieved at that of time.
16601
+ * date object - to get the data valid on that date.
16602
+ * datetime object - to get the data valid at that point of time.
16603
+ * tuple - to get the data which is valid between the two valid times.
16604
+ * tuple should have only two elements. First element considered as starting time
16605
+ and second element considered as end time for a period of time.
16606
+ Records will be retrieved which are valid between the two valid times.
16607
+ * Both elements can be of date or datetime or string type. If you are using
16608
+ string, make sure the string represents a valid date.
16609
+ * Any element can be None.
16610
+ * If first element is None and valid time dimension column is PERIOD_DATE type,
16611
+ then it is considered as '0001-01-01'.
16612
+ * If first element is None and valid time dimension column is PERIOD_TIMESTAMP type,
16613
+ then it is considered as '0001-01-01 00:00:00.000000+00:00'.
16614
+ * If second element is None and valid time dimension column is PERIOD_DATE type,
16615
+ then it is considered as '9999-12-31'.
16616
+ * If second element is None and valid time dimension column is PERIOD_TIMESTAMP type,
16617
+ then it is considered as '9999-12-31 23:59:59.999999+00:00'.
16618
+ * None - to consider the DataFrame as regular DataFrame and retrieve all the records from
16619
+ valid time dimension.
16620
+ Types: date or str or tuple or NoneType
16621
+
16622
+ include_valid_time_column:
16623
+ Optional Argument.
16624
+ Specifies whether to include the valid time dimension column in the resultant DataFrame.
16625
+ When set to True, valid time dimension column is included in resultant DataFrame.
16626
+ Otherwise, valid time dimension column is not included in resultant DataFrame.
16627
+ Note:
16628
+ Ignored when "valid_time" is either tuple or None.
16629
+ Default Value: False
16630
+ Types: bool
16631
+
16632
+ transaction_time:
16633
+ Optional Argument.
16634
+ Specifies the transaction time to retrieve data from DataFrame created on either
16635
+ TransactionTime or BiTemporal table/view.
16636
+ Notes:
16637
+ * Either "valid_time" or "transaction_time" must be provided.
16638
+ * Argument accepts below values.
16639
+ * "current" - to get the records which are valid at current time.
16640
+ * any string other than "current" is considered as timestamp and records which are
16641
+ valid at that of time.
16642
+ * datetime object - to get the records which are valid at that of time.
16643
+ * None - to consider the DataFrame as regular DataFrame and retrieve all the records
16644
+ from transaction time dimension.
16645
+ Types: datetime or str or NoneType
16646
+
16647
+ include_transaction_time_column:
16648
+ Optional Argument.
16649
+ Specifies whether to include the transaction time dimension column in the resultant DataFrame.
16650
+ When set to True, transaction time dimension column is included in resultant DataFrame.
16651
+ Otherwise, transaction time dimension column is not included in resultant DataFrame.
16652
+ Default Value: False
16653
+ Types: bool
16654
+
16655
+ additional_period:
16656
+ Optional Argument.
16657
+ Specifies the additional period to be kept in resultant DataFrame.
16658
+ Note:
16659
+ This is applicable only when "valid_time" is None.
16660
+ Types: tuple of date or str
16661
+
16662
+ RETURNS:
16663
+ teradataml DataFrame
16664
+
16665
+ RAISES:
16666
+ TeradatamlException.
16667
+
16668
+ EXAMPLES:
16669
+ # Load the data to run the example.
16670
+ >>> load_example_data("teradataml", "Employee_roles") # load valid time data.
16671
+ >>> load_example_data("teradataml", "Employee_Address") # load transaction time data.
16672
+ >>> load_example_data("teradataml", "Employee") # load bitemporal data.
16673
+
16674
+ >>> df1 = DataFrame("Employee_roles")
16675
+ EmployeeName Department Salary role_validity_period
16676
+ EmployeeID
16677
+ 1 John Doe IT 100.0 ('20/01/01', '24/12/31')
16678
+ 2 Jane Smith DA 200.0 ('20/01/01', '99/12/31')
16679
+ 3 Bob Marketing 330.0 ('25/01/01', '99/12/31')
16680
+ 3 Bob Sales 300.0 ('24/01/01', '24/12/31')
16681
+
16682
+ # Example 1: Get the employee roles from DataFrame df1 which are valid at current time.
16683
+ >>> df1.as_of(valid_time="current")
16684
+ EmployeeName Department Salary
16685
+ EmployeeID
16686
+ 2 Jane Smith DA 200.0
16687
+ 3 Bob Marketing 330.0
16688
+
16689
+ # Example 2: Get the employee roles from DataFrame df1 which are valid at current time.
16690
+ # Also include valid time dimension column.
16691
+ >>> df1.as_of(valid_time="current", include_valid_time_column=True)
16692
+ EmployeeName Department Salary role_validity_period
16693
+ EmployeeID
16694
+ 2 Jane Smith DA 200.0 ('20/01/01', '99/12/31')
16695
+ 3 Bob Marketing 330.0 ('25/01/01', '99/12/31')
16696
+
16697
+ # Example 3: Get the employee roles from DataFrame df1 which are valid at 31st Dec 2026.
16698
+ Include valid time dimension column.
16699
+ >>> df1.as_of(valid_time="2026-12-31", include_valid_time_column=True)
16700
+ EmployeeName Department Salary role_validity_period
16701
+ EmployeeID
16702
+ 2 Jane Smith DA 200.0 ('20/01/01', '99/12/31')
16703
+ 3 Bob Marketing 330.0 ('25/01/01', '99/12/31')
16704
+
16705
+ # Example 4: Get the employee roles from DataFrame df1 which are valid at 31st Dec 2026.
16706
+ # Also include valid time dimension column. Use date object instead of string
16707
+ # to specify the date.
16708
+ >>> from datetime import date
16709
+ >>> d = date(2026, 12, 31)
16710
+ >>> df1.as_of(valid_time=d, include_valid_time_column=True)
16711
+ EmployeeName Department Salary role_validity_period
16712
+ EmployeeID
16713
+ 2 Jane Smith DA 200.0 ('20/01/01', '99/12/31')
16714
+ 3 Bob Marketing 330.0 ('25/01/01', '99/12/31')
16715
+
16716
+ # Example 5: Get the employee roles which are valid between 20th Jan 2018 and 5th March 2024.
16717
+ # Include valid time dimension column.
16718
+ >>> df1.as_of(valid_time=("2018-01-20", "2024-03-05"), include_valid_time_column=True)
16719
+ EmployeeName Department Salary VALIDTIME
16720
+ EmployeeID
16721
+ 2 Jane Smith DA 200.0 ('20/01/01', '24/03/05')
16722
+ 1 John Doe IT 100.0 ('20/01/01', '24/03/05')
16723
+ 3 Bob Sales 300.0 ('24/01/01', '24/03/05')
16724
+
16725
+ # Example 6: Get the employee roles which are valid between 20th Jan 2018 and 5th March 2024.
16726
+ # Then again get the records which are valid at 1st Jan 2023. Do not include
16727
+ # valid time dimension column since selecting valid time dimension column is ignored
16728
+ # when "valid_time" is a tuple.
16729
+ >>> df1.as_of(valid_time=(date(2018, 1, 20), "2024-03-05")).as_of(valid_time=date(2023, 1, 1))
16730
+ EmployeeName Department Salary
16731
+ EmployeeID
16732
+ 2 Jane Smith DA 200.0
16733
+ 1 John Doe IT 100.0
16734
+
16735
+ # Example 7: Get the employee roles which are valid between 1st Jan 0001 and 1st Jun 2024.
16736
+ >>> df1.as_of(valid_time=(None, date(2024, 3, 5)))
16737
+ EmployeeName Department Salary VALIDTIME
16738
+ EmployeeID
16739
+ 2 Jane Smith DA 200.0 ('20/01/01', '24/03/05')
16740
+ 1 John Doe IT 100.0 ('20/01/01', '24/03/05')
16741
+ 3 Bob Sales 300.0 ('24/01/01', '24/03/05')
16742
+
16743
+ # Example 8: Get the employee roles which are valid between 1st Jun 2024 and 31st Dec 9999.
16744
+ >>> df1.as_of(valid_time=("2024-06-01", None))
16745
+ EmployeeName Department Salary VALIDTIME
16746
+ EmployeeID
16747
+ 1 John Doe IT 100.0 ('24/06/01', '24/12/31')
16748
+ 2 Jane Smith DA 200.0 ('24/06/01', '99/12/31')
16749
+ 3 Bob Marketing 330.0 ('25/01/01', '99/12/31')
16750
+ 3 Bob Sales 300.0 ('24/06/01', '24/12/31')
16751
+
16752
+ # Example 9: Consider df1 as regular DataFrame and retrieve all the records irrespective
16753
+ # whether records are valid or not.
16754
+ >>> df1.as_of(valid_time=None)
16755
+ EmployeeName Department Salary
16756
+ EmployeeID
16757
+ 1 John Doe IT 100.0
16758
+ 2 Jane Smith DA 200.0
16759
+ 3 Bob Marketing 330.0
16760
+ 3 Bob Sales 300.0
16761
+
16762
+ # Example 10. Consider df1 as regular DataFrame and retrieve all the records irrespective
16763
+ # whether records are valid or not. Also include additional period and valid time
16764
+ # dimension column.
16765
+ >>> df1.as_of(valid_time=None, additional_period=("2024-01-01", "2024-03-05"), include_valid_time_column=True)
16766
+ EmployeeName Department Salary role_validity_period VALIDTIME
16767
+ EmployeeID
16768
+ 1 John Doe IT 100.0 ('20/01/01', '24/12/31') ('24/01/01', '24/03/05')
16769
+ 2 Jane Smith DA 200.0 ('20/01/01', '99/12/31') ('24/01/01', '24/03/05')
16770
+ 3 Bob Marketing 330.0 ('25/01/01', '99/12/31') ('24/01/01', '24/03/05')
16771
+ 3 Bob Sales 300.0 ('24/01/01', '24/12/31') ('24/01/01', '24/03/05')
16772
+
16773
+ >>> df2 = DataFrame("Employee_Address")
16774
+ EmployeeName address validity_period
16775
+ EmployeeID
16776
+ 2 Jane Smith 456 Elm St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
16777
+ 1 John Doe 123 Main St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
16778
+ 3 Bob Johnson 789 Oak St ('2025-03-04 15:41:44.610001+00:00', '9999-12-31 23:59:59.999999+00:00')
16779
+
16780
+ # Example 11: Consider df2 as regular DataFrame and retrieve all the records including historic
16781
+ # records. Also include transaction time dimension column.
16782
+ >>> df2.as_of(transaction_time=None, include_transaction_time_column=True)
16783
+ EmployeeName address validity_period
16784
+ EmployeeID
16785
+ 1 John Doe 123 Main St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
16786
+ 2 Jane Smith 456 Elm St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
16787
+ 3 Bob Johnson 789 Oak Street ('2025-03-04 15:41:44.610000+00:00', '2025-03-04 15:41:44.610001+00:00')
16788
+ 3 Bob Johnson 789 Oak St ('2025-03-04 15:41:44.610001+00:00', '9999-12-31 23:59:59.999999+00:00')
16789
+
16790
+ # Example 12: Get the employee address which are valid at current time from DataFrame df2.
16791
+ # Also include transaction time dimension column.
16792
+ >>> df2.as_of(transaction_time="current", include_transaction_time_column=True)
16793
+ EmployeeName address validity_period
16794
+ EmployeeID
16795
+ 2 Jane Smith 456 Elm St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
16796
+ 1 John Doe 123 Main St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
16797
+ 3 Bob Johnson 789 Oak St ('2025-03-04 15:41:44.610001+00:00', '9999-12-31 23:59:59.999999+00:00')
16798
+
16799
+ # Example 13: Get the employee address which are valid at current time from DataFrame df2.
16800
+ # Do not include transaction time dimension column.
16801
+ >>> df2.as_of(transaction_time="current", include_transaction_time_column=False)
16802
+ EmployeeName address
16803
+ EmployeeID
16804
+ 2 Jane Smith 456 Elm St
16805
+ 1 John Doe 123 Main St
16806
+ 3 Bob Johnson 789 Oak St
16807
+
16808
+ # Example 14: Get the employee address which are valid at 2025-03-04 15:41:44.610000+00:00 from DataFrame df2.
16809
+ # Include transaction time dimension column.
16810
+ >>> df2.as_of(transaction_time="2025-03-04 15:41:44.610000+00:00", include_transaction_time_column=True)
16811
+ EmployeeName address validity_period
16812
+ EmployeeID
16813
+ 2 Jane Smith 456 Elm St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
16814
+ 1 John Doe 123 Main St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
16815
+ 3 Bob Johnson 789 Oak Street ('2025-03-04 15:41:44.610000+00:00', '2025-03-04 15:41:44.610001+00:00')
16816
+
16817
+ # Example 15: Get the employee address which are valid at 2025-03-04 15:41:44.610001+00:00 from DataFrame df2.
16818
+ # Include transaction time dimension column.
16819
+ >>> from datetime import datetime, timezone, timedelta
16820
+ >>> dt = datetime(2025, 3, 4, 15, 41, 44, 610001)
16821
+ >>> dt_with_tz = dt.replace(tzinfo=timezone(timedelta(hours=0)))
16822
+ >>> df2.as_of(transaction_time=dt_with_tz, include_transaction_time_column=True)
16823
+ EmployeeName address validity_period
16824
+ EmployeeID
16825
+ 2 Jane Smith 456 Elm St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
16826
+ 1 John Doe 123 Main St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
16827
+ 3 Bob Johnson 789 Oak St ('2025-03-04 15:41:44.610001+00:00', '9999-12-31 23:59:59.999999+00:00')
16828
+
16829
+ >>> df3 = DataFrame("Employee")
16830
+ EmployeeName address Department Salary role_validity validity_period
16831
+ EmployeeID
16832
+ 1 John Doe 123 Main St IT 100.0 ('20/01/01', '24/12/31') ('2025-03-04 18:08:58.720000+00:00', '9999-12-31 23:59:59.999999+00:00')
16833
+ 2 Jane Smith 456 Elm St DA 200.0 ('20/01/01', '99/12/31') ('2025-03-04 18:08:58.720000+00:00', '9999-12-31 23:59:59.999999+00:00')
16834
+ 3 Bob 789 OAK St Marketing 330.0 ('25/01/01', '99/12/31') ('2025-05-06 11:39:25.580000+00:00', '9999-12-31 23:59:59.999999+00:00')
16835
+ 3 Bob 789 Oak St Sales 300.0 ('24/01/01', '24/12/31') ('2025-03-04 18:09:08.830000+00:00', '9999-12-31 23:59:59.999999+00:00')
16836
+
16837
+ # Example 16: Get all the records from DataFrame df3 by considering the DataFrame as
16838
+ # regular DataFrame. Include both valid time and transaction time dimension columns.
16839
+ >>> df3.as_of(valid_time=None,
16840
+ ... transaction_time=None,
16841
+ ... include_valid_time_column=True,
16842
+ ... include_transaction_time_column=True
16843
+ ... )
16844
+ EmployeeName address Department Salary role_validity validity_period
16845
+ EmployeeID
16846
+ 3 Bob 789 Oak Street Sales 300.0 ('24/01/01', '24/12/31') ('2025-03-04 18:08:58.720000+00:00', '2025-03-04 18:09:08.830000+00:00')
16847
+ 3 Bob 789 Oak St Marketing 330.0 ('25/01/01', '99/12/31') ('2025-03-04 18:09:08.830000+00:00', '2025-05-06 11:39:25.580000+00:00')
16848
+ 1 John Doe 123 Main St IT 100.0 ('20/01/01', '24/12/31') ('2025-03-04 18:08:58.720000+00:00', '9999-12-31 23:59:59.999999+00:00')
16849
+ 2 Jane Smith 456 Elm St DA 200.0 ('20/01/01', '99/12/31') ('2025-03-04 18:08:58.720000+00:00', '9999-12-31 23:59:59.999999+00:00')
16850
+ 3 Bob 789 Oak Street Marketing 330.0 ('25/01/01', '99/12/31') ('2025-03-04 18:08:58.720000+00:00', '2025-03-04 18:09:08.830000+00:00')
16851
+ 3 Bob 789 OAK St Marketing 330.0 ('25/01/01', '99/12/31') ('2025-05-06 11:39:25.580000+00:00', '9999-12-31 23:59:59.999999+00:00')
16852
+ 3 Bob 789 Oak St Sales 300.0 ('24/01/01', '24/12/31') ('2025-03-04 18:09:08.830000+00:00', '9999-12-31 23:59:59.999999+00:00')
16853
+
16854
+ # Example 17: Get the employee address from DataFrame df3 which are valid at 1st Jun 2024 from
16855
+ # valid time dimension and valid at '2025-03-04 18:09:08.720001+00:00' from transaction
16856
+ # time dimension. Include both valid time and transaction time dimension columns.
16857
+ >>> df3.as_of(valid_time="2024-06-01",
16858
+ ... transaction_time="2025-03-04 18:09:08.720001+00:00",
16859
+ ... include_valid_time_column=True,
16860
+ ... include_transaction_time_column=True
16861
+ ... )
16862
+ EmployeeName address Department Salary role_validity validity_period
16863
+ EmployeeID
16864
+ 2 Jane Smith 456 Elm St DA 200.0 ('20/01/01', '99/12/31') ('2025-03-04 18:08:58.720000+00:00', '9999-12-31 23:59:59.999999+00:00')
16865
+ 1 John Doe 123 Main St IT 100.0 ('20/01/01', '24/12/31') ('2025-03-04 18:08:58.720000+00:00', '9999-12-31 23:59:59.999999+00:00')
16866
+ 3 Bob 789 Oak Street Sales 300.0 ('24/01/01', '24/12/31') ('2025-03-04 18:08:58.720000+00:00', '2025-03-04 18:09:08.830000+00:00')
16867
+
16868
+ # Example 18: Get the employee address from DataFrame df3 which are valid at 25th Jan 2024
16869
+ # from valid time dimension and valid at current time from transaction time dimension.
16870
+ # Include only transaction time dimension column.
16871
+ >>> df3.as_of(valid_time=date(2024, 1, 25),
16872
+ ... transaction_time="current",
16873
+ ... include_transaction_time_column=True)
16874
+ EmployeeName address Department Salary validity_period
16875
+ EmployeeID
16876
+ 2 Jane Smith 456 Elm St DA 200.0 ('2025-03-04 18:08:58.720000+00:00', '9999-12-31 23:59:59.999999+00:00')
16877
+ 1 John Doe 123 Main St IT 100.0 ('2025-03-04 18:08:58.720000+00:00', '9999-12-31 23:59:59.999999+00:00')
16878
+ 3 Bob 789 Oak St Sales 300.0 ('2025-03-04 18:09:08.830000+00:00', '9999-12-31 23:59:59.999999+00:00')
16879
+
16880
+ # Example 19: Get the employee address from DataFrame df3 which are valid between 1st Jan 2025
16881
+ # and 30th June 2025 from valid time dimension and valid at
16882
+ # '2025-03-04 18:08:59.720000+00:00' from transaction time dimension.
16883
+ # Include both valid time and transaction time dimension columns.
16884
+ >>> from datetime import datetime, timezone
16885
+ >>>df3.as_of(valid_time=("2025-01-01", date(2025, 6, 30)),
16886
+ ... transaction_time=datetime(2025, 3, 4, 18, 8, 59, 720000).astimezone(timezone.utc),
16887
+ ... include_transaction_time_column=True)
16888
+ EmployeeName address Department Salary validity_period VALIDTIME
16889
+ EmployeeID
16890
+ 2 Jane Smith 456 Elm St DA 200.0 ('2025-03-04 18:08:58.720000+00:00', '9999-12-31 23:59:59.999999+00:00') ('25/01/01', '25/06/30')
16891
+ 3 Bob 789 Oak St Marketing 330.0 ('2025-03-04 18:09:08.830000+00:00', '2025-05-06 11:39:25.580000+00:00') ('25/01/01', '25/06/30')
16892
+
16893
+ # Example 20: Get the employee address from DataFrame df3 by considering the DataFrame as regular
16894
+ # DataFrame from valid time dimension and valid at current time from transaction time dimension.
16895
+ # Add additional period and include both valid time and transaction time dimension columns.
16896
+ >>> df3.as_of(valid_time=None,
16897
+ ... transaction_time="current",
16898
+ ... additional_period=("2024-01-01", "2024-03-05"),
16899
+ ... include_valid_time_column=True,
16900
+ ... include_transaction_time_column=True
16901
+ ... )
16902
+ EmployeeName address Department Salary role_validity validity_period VALIDTIME
16903
+ EmployeeID
16904
+ 1 John Doe 123 Main St IT 100.0 ('20/01/01', '24/12/31') ('2025-03-04 18:08:58.720000+00:00', '9999-12-31 23:59:59.999999+00:00') ('24/01/01', '24/03/05')
16905
+ 2 Jane Smith 456 Elm St DA 200.0 ('20/01/01', '99/12/31') ('2025-03-04 18:08:58.720000+00:00', '9999-12-31 23:59:59.999999+00:00') ('24/01/01', '24/03/05')
16906
+ 3 Bob 789 OAK St Marketing 330.0 ('25/01/01', '99/12/31') ('2025-05-06 11:39:25.580000+00:00', '9999-12-31 23:59:59.999999+00:00') ('24/01/01', '24/03/05')
16907
+ 3 Bob 789 Oak St Sales 300.0 ('24/01/01', '24/12/31') ('2025-03-04 18:09:08.830000+00:00', '9999-12-31 23:59:59.999999+00:00') ('24/01/01', '24/03/05')
16908
+ """
16909
+
16910
+ if "valid_time" not in kwargs and "transaction_time" not in kwargs:
16911
+ _Validators._validate_mutually_exclusive_arguments(
16912
+ None, "valid_time", None, "transaction_time")
16913
+
16914
+ # Validate argument types.
16915
+ _validation = []
16916
+ _validation.append(["valid_time", kwargs.get("valid_time"), True, (date, datetime, str, tuple, type(None))])
16917
+ _validation.append(["transaction_time", kwargs.get("transaction_time"), True, (datetime, str, type(None))])
16918
+ _validation.append(["additional_period", kwargs.get("additional_period"), True, (tuple, type(None))])
16919
+ _validation.append(["include_valid_time_column", kwargs.get("include_valid_time_column"), True, bool])
16920
+ _validation.append(["include_transaction_time_column", kwargs.get("include_transaction_time_column"), True, bool])
16921
+
16922
+ # Validate argument types
16923
+ _Validators._validate_function_arguments(_validation)
16924
+
16925
+ # Validate temporal table type.
16926
+ _Validators._validate_temporal_table_type(self.df_type)
16927
+
16928
+ # Extract valid_time and transaction_time from kwargs.
16929
+ valid_time = kwargs.get("valid_time")
16930
+ transaction_time = kwargs.get("transaction_time")
16931
+ additional_period = kwargs.get("additional_period")
16932
+ include_valid_time_column = kwargs.get("include_valid_time_column")
16933
+ include_transaction_time_column = kwargs.get("include_transaction_time_column")
16934
+
16935
+ # Validate if user specifies valid_time for a transaction time table.
16936
+ if "valid_time" in kwargs:
16937
+ _Validators._validate_as_of_arguments(df_type=self.df_type)
16938
+
16939
+ # Validate if user specifies transaction_time for a valid time table.
16940
+ if "transaction_time" in kwargs:
16941
+ _Validators._validate_as_of_arguments(df_type=self.df_type, argument_name='transaction_time')
16942
+
16943
+ add_vt_period = False
16944
+
16945
+ # Generate the time qualifier clause.
16946
+ if "valid_time" in kwargs and "transaction_time" not in kwargs:
16947
+ clause = self.__get_valid_time_clause(valid_time, additional_period)
16948
+ elif "transaction_time" in kwargs and "valid_time" not in kwargs:
16949
+ clause = self.__get_transaction_time_clause(transaction_time)
16950
+ else:
16951
+ # Generate both clauses.
16952
+ clause = "{} AND {}".format(self.__get_valid_time_clause(valid_time, additional_period),
16953
+ self.__get_transaction_time_clause(transaction_time)
16954
+ )
16955
+
16956
+ # Exclude the time dimension columns if user is not willing to see it in output DF.
16957
+ columns_to_exclude = []
16958
+ if not include_valid_time_column and self._valid_time_column:
16959
+ columns_to_exclude.append(self._valid_time_column.name)
16960
+
16961
+ if not include_transaction_time_column and self._transaction_time_column:
16962
+ columns_to_exclude.append(self._transaction_time_column.name)
16963
+
16964
+ columns = [col for col in self.columns if col not in columns_to_exclude]
16965
+ col_names_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr, columns)
16966
+
16967
+ # Notes:
16968
+ # * If valid_time is tuple, i.e., for valid time qualifier SEQUENCED VALIDTIME,
16969
+ # add additional column VALIDTIME. This column should not be present in SELECT statement.
16970
+ # Also, ValidTime dimension column should not be present in SELECT statement. VALIDTIME column
16971
+ # acts as validTime dimension column here.
16972
+ # * Time qualifier NONSEQUENCED VALIDTIME PERIOD clause also produces additional column VALIDTIME.
16973
+ # Hence, add additional column VALIDTIME also returned in the output DataFrame. However, valid time
16974
+ # column can exist in SELECT statement.
16975
+ if isinstance(valid_time, tuple):
16976
+ add_vt_period = True
16977
+ columns = [col for col in columns if col != self._valid_time_column.name]
16978
+ col_names_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr, columns)
16979
+ col_names_types["VALIDTIME"] = self._valid_time_column.type
16980
+ elif (isinstance(valid_time, type(None)) and additional_period is not None):
16981
+ col_names_types = df_utils._get_required_columns_types_from_metaexpr(self._metaexpr, columns)
16982
+ col_names_types["VALIDTIME"] = self._valid_time_column.type
16983
+
16984
+ # SELECT Node.
16985
+ column_expression = ", ".join(columns)
16986
+ sel_nodeid = self._aed_utils._aed_select(self._nodeid, column_expression, timestamp_expr=clause)
16987
+
16988
+ # Constructing new Metadata (_metaexpr) without DB; using dummy select_nodeid and underlying table name.
16989
+ new_metaexpr = UtilFuncs._get_metaexpr_using_columns(sel_nodeid, col_names_types.items())
16990
+ df = self._create_dataframe_from_node(sel_nodeid, new_metaexpr, self._index_label)
16991
+
16992
+ # If time qualifier is SEQUENCED PERIOD, then add VALIDTIME column to DataFrame
16993
+ # since it produces temporal dataset.
16994
+ if add_vt_period:
16995
+ df._valid_time_column = df['VALIDTIME']
16996
+
16997
+ return df
16998
+
16999
+ def __get_valid_time_clause(self, valid_time, additional_period=None):
17000
+ """
17001
+ DESCRIPTION:
17002
+ Function to get valid time clause for temporal table.
17003
+
17004
+ PARAMETERS:
17005
+ valid_time:
17006
+ Required Argument.
17007
+ Specifies the valid time dimension to represent temporal data when creating the DataFrame.
17008
+ Types: date or str
17009
+
17010
+ additional_period:
17011
+ Optional Argument.
17012
+ Specifies the additional period to be kept in DataFrame.
17013
+ Note:
17014
+ This is applicable only when "valid_time" is None.
17015
+ Types: tuple of date or str
17016
+
17017
+ RETURNS:
17018
+ str
17019
+
17020
+ RAISES:
17021
+ None.
17022
+ """
17023
+ is_vt_dt_type = isinstance(self._valid_time_column.type, tdtypes.PERIOD_DATE)
17024
+ if valid_time == "current":
17025
+ return "CURRENT VALIDTIME"
17026
+
17027
+ if isinstance(valid_time, (str, date, datetime)):
17028
+ # If valid_time is a string, then check what is the type of temporal column.
17029
+ # ValidTime dimension allows both DATE and TIMESTAMP type for ValidTime dimension
17030
+ # columns.
17031
+ if is_vt_dt_type:
17032
+ return "VALIDTIME AS OF DATE '{}'".format(valid_time)
17033
+ return "VALIDTIME AS OF TIMESTAMP '{}'".format(valid_time)
17034
+
17035
+ # If valid_time is a tuple, then it is a period.
17036
+ # User can specify start and/or end time. Derive missing value.
17037
+ if isinstance(valid_time, tuple):
17038
+ start = valid_time[0]
17039
+ end = valid_time[1]
17040
+ start = ("0001-01-01" if is_vt_dt_type else '0001-01-01 00:00:00.000000+00:00') if start is None else str(
17041
+ start)
17042
+ end = ("9999-12-31" if is_vt_dt_type else '9999-12-31 23:59:59.999999+00:00') if end is None else str(end)
17043
+ return "SEQUENCED VALIDTIME PERIOD '({}, {})'".format(start, end)
17044
+
17045
+ if isinstance(valid_time, type(None)) and additional_period is not None:
17046
+ return "NONSEQUENCED VALIDTIME PERIOD '({}, {})'".format(additional_period[0], additional_period[1])
17047
+
17048
+ return "NONSEQUENCED VALIDTIME"
17049
+
17050
+ def __get_transaction_time_clause(self, transaction_time):
17051
+ """
17052
+ DESCRIPTION:
17053
+ Function to get transaction time clause for temporal table.
17054
+
17055
+ PARAMETERS:
17056
+ transaction_time:
17057
+ Required Argument.
17058
+ Specifies the transaction time dimension to represent temporal data when creating the DataFrame.
17059
+ Types: date or str
17060
+
17061
+ RETURNS:
17062
+ str
17063
+
17064
+ RAISES:
17065
+ None.
17066
+ """
17067
+ if transaction_time == "current":
17068
+ return "CURRENT TRANSACTIONTIME"
17069
+
17070
+ if isinstance(transaction_time, type(None)):
17071
+ return "NONSEQUENCED TRANSACTIONTIME"
17072
+
17073
+ return "TRANSACTIONTIME as of timestamp '{}'".format(transaction_time)
17074
+
17075
+ def _generate_temporal_dataframe(self, timestamp_expr, time_column):
17076
+ """
17077
+ DESCRIPTION:
17078
+ Helper method to generate a temporal DataFrame based on the given timestamp expression.
17079
+
17080
+ PARAMETERS:
17081
+ timestamp_expr:
17082
+ Required Argument.
17083
+ Specifies the timestamp expression to filter the temporal data.
17084
+ Types: str
17085
+
17086
+ time_column:
17087
+ Required Argument.
17088
+ Specifies the temporal column (valid-time or transaction-time) to process.
17089
+ Types: ColumnExpression
17090
+
17091
+ RAISES:
17092
+ None.
17093
+
17094
+ RETURNS:
17095
+ teradataml DataFrame
17096
+ """
17097
+ col_expr = "{} as {}".format(time_column.cast(time_column.type).compile(), time_column.name)
17098
+ cols = [col.name if col.name != time_column.name else col_expr for col in self._metaexpr.c]
17099
+ column_expression = ", ".join(cols)
17100
+ sel_node_id = self._aed_utils._aed_select(self._nodeid, column_expression, timestamp_expr=timestamp_expr)
17101
+ return self._create_dataframe_from_node(sel_node_id, self._metaexpr, self._index_label)
17102
+
17103
+ def historic_rows(self):
17104
+ """
17105
+ DESCRIPTION:
17106
+ Retrieves historical rows from a DataFrame created on a valid-time
17107
+ or bi-temporal table/view. Historical rows are defined as those where the
17108
+ end of the valid-time period precedes the current time.
17109
+
17110
+ PARAMETERS:
17111
+ None.
17112
+
17113
+ RETURNS:
17114
+ teradataml DataFrame.
17115
+
17116
+ RAISES:
17117
+ TeradataMLException.
17118
+
17119
+ EXAMPLES:
17120
+ # Load the data to run the example.
17121
+ >>> load_example_data("teradataml", "Employee_roles")
17122
+
17123
+ # Create a DataFrame on 'Employee_roles' table.
17124
+ >>> df = DataFrame("Employee_roles")
17125
+
17126
+ # Retrieve historic rows from the DataFrame.
17127
+ >>> df.historic_rows()
17128
+ EmployeeID EmployeeName Department Salary role_validity_period
17129
+ 1 John Doe IT 100.0 ('20/01/01', '24/12/31')
17130
+ 3 Bob Sales 300.0 ('24/01/01', '24/12/31')
17131
+ """
17132
+
17133
+ from teradataml.dataframe.functions import current_date, current_timestamp
17134
+ # Validate temporal table type.
17135
+ _Validators._validate_temporal_table_type(self.df_type)
17136
+ valid_time_col = self._valid_time_column
17137
+ df = self._generate_temporal_dataframe("NONSEQUENCED VALIDTIME", valid_time_col)
17138
+ # Check the type of the ValidTime dimension column
17139
+ if isinstance(valid_time_col.type, tdtypes.PERIOD_DATE):
17140
+ # Filter records where the end of the ValidTime period is less than the current date
17141
+ return df[valid_time_col.end() < current_date()]
17142
+ return df[valid_time_col.end() < current_timestamp()]
17143
+
17144
+ def future_rows(self):
17145
+ """
17146
+ DESCRIPTION:
17147
+ Retrieves future rows from a DataFrame created on a valid-
17148
+ time or bi-temporal table/view. Future rows are defined as those where the
17149
+ start of the valid-time period is greater than the current time.
17150
+
17151
+ PARAMETERS:
17152
+ None.
17153
+
17154
+ RETURNS:
17155
+ teradataml DataFrame.
17156
+
17157
+ RAISES:
17158
+ TeradataMLException.
17159
+
17160
+ EXAMPLES:
17161
+ # Load the data to run the example.
17162
+ >>> load_example_data("teradataml", "Employee_roles")
17163
+
17164
+ # Create a DataFrame on 'Employee_roles' table.
17165
+ >>> df = DataFrame("Employee_roles")
17166
+
17167
+ # Retrieve future rows from the DataFrame.
17168
+ >>> df.future_rows()
17169
+ EmployeeID EmployeeName Department Salary role_validity_period
17170
+ 3 Bob Marketing 330.0 ('29/01/01', '99/12/31')
17171
+ """
17172
+ from teradataml.dataframe.functions import current_date, current_timestamp
17173
+ # Validate temporal table type.
17174
+ _Validators._validate_temporal_table_type(self.df_type)
17175
+ valid_time_col = self._valid_time_column
17176
+ df = self._generate_temporal_dataframe("NONSEQUENCED VALIDTIME", valid_time_col)
17177
+ # Check the type of the ValidTime dimension column
17178
+ if isinstance(valid_time_col.type, tdtypes.PERIOD_DATE):
17179
+ # Filter records where the start of the ValidTime period is greater than the current date
17180
+ return df[valid_time_col.begin() > current_date()]
17181
+ return df[valid_time_col.begin() > current_timestamp()]
17182
+
17183
+ def open_rows(self):
17184
+ """
17185
+ DESCRIPTION:
17186
+ Retrieves open rows from a DataFrame created on a transaction-time
17187
+ or bi-temporal table/view. Open rows are defined as those where the
17188
+ end of the transaction-time period is greater than or equal to the current time.
17189
+
17190
+ PARAMETERS:
17191
+ None.
17192
+
17193
+ RETURNS:
17194
+ teradataml DataFrame.
17195
+
17196
+ RAISES:
17197
+ TeradataMLException.
17198
+
17199
+ EXAMPLES:
17200
+ # Load the data to run the example.
17201
+ >>> load_example_data("teradataml", "Employee_address")
17202
+
17203
+ # Create a DataFrame on 'Employee_address' table.
17204
+ >>> df = DataFrame("Employee_address")
17205
+
17206
+ # Retrieve open rows from the DataFrame.
17207
+ >>> df.open_rows()
17208
+ EmployeeID EmployeeName address validity_period
17209
+ 1 John Doe 123 Main St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
17210
+ 2 Jane Smith 456 Elm St ('2025-03-04 15:41:44.610000+00:00', '9999-12-31 23:59:59.999999+00:00')
17211
+ """
17212
+ from teradataml.dataframe.functions import current_timestamp
17213
+ # Validate temporal table type.
17214
+ _Validators._validate_temporal_table_type(self.df_type)
17215
+ transaction_time_col = self._transaction_time_column
17216
+ df = self._generate_temporal_dataframe("NONSEQUENCED TRANSACTIONTIME", transaction_time_col)
17217
+ return df[transaction_time_col.end() >= current_timestamp()]
17218
+
17219
+ def closed_rows(self):
17220
+ """
17221
+ DESCRIPTION:
17222
+ Retrieves closed rows from a DataFrame created on a transaction-time
17223
+ or bi-temporal table/view. Closed rows are defined as those where the
17224
+ end of the transaction-time period is less than the current time.
17225
+
17226
+ PARAMETERS:
17227
+ None.
17228
+
17229
+ RETURNS:
17230
+ teradataml DataFrame.
17231
+
17232
+ RAISES:
17233
+ TeradataMLException.
17234
+
17235
+ EXAMPLES:
17236
+ # Load the data to run the example.
17237
+ >>> load_example_data("teradataml", "Employee_address")
17238
+
17239
+ # Create a DataFrame on 'Employee_address' table.
17240
+ >>> df = DataFrame("Employee_address")
17241
+
17242
+ # Retrieve closed rows from the DataFrame.
17243
+ >>> df.closed_rows()
17244
+ EmployeeID EmployeeName address validity_period
17245
+ 1 John Doe 123 Main St ('2025-03-04 15:41:44.610000+00:00', '2025-04-01 23:59:59.999999+00:00')
17246
+ """
17247
+ from teradataml.dataframe.functions import current_timestamp
17248
+ # Validate temporal table type.
17249
+ _Validators._validate_temporal_table_type(self.df_type)
17250
+ transaction_time_col = self._transaction_time_column
17251
+ df = self._generate_temporal_dataframe("NONSEQUENCED TRANSACTIONTIME", transaction_time_col)
17252
+ return df[transaction_time_col.end() < current_timestamp()]
17253
+
17254
+ @collect_queryband(queryband="DF_create_view")
17255
+ def create_view(self, view_name, schema_name=None):
17256
+ """
17257
+ Creates a view from the DataFrame object in the specified schema.
17258
+ As teradataml creates views, internally for operations, which will be garbage
17259
+ collected during remove_context(), this function helps the user to persist the
17260
+ DataFrame as a view.
17261
+ Note:
17262
+ The persisted view can be used across sessions and can be accessed
17263
+ using the view_name and schema_name.
17264
+
17265
+ PARAMETERS:
17266
+ view_name:
17267
+ Required Argument.
17268
+ Specifies the name of the view to be persisted.
17269
+ Types: str
17270
+
17271
+ schema_name:
17272
+ Optional Argument.
17273
+ Specifies the schema name where the view is to be persisted.
17274
+ Note:
17275
+ If the schema_name is not provided, the current database will be used.
17276
+ Types: str
17277
+
17278
+ RETURNS:
17279
+ Persisted teradataml DataFrame.
17280
+
17281
+ RAISES:
17282
+ TeradataMlException
17283
+
17284
+ EXAMPLES:
17285
+ # Load the data to run the example.
17286
+ >>> load_example_data("antiselect", ["antiselect_input"])
17287
+ >>> antiselect_input = DataFrame.from_table("antiselect_input")
17288
+ >>> antiselect_input
17289
+ orderid orderdate priority quantity sales discount shipmode custname province region custsegment prodcat
17290
+ rowids
17291
+ 49 293 12/10/01 high 49 10123.0200 0.07 delivery truck barry french nunavut nunavut consumer office supplies
17292
+ 97 613 11/06/17 high 12 93.5400 0.03 regular air carl jackson nunavut nunavut corporate office supplies
17293
+ 85 515 10/08/28 not specified 19 394.2700 0.08 regular air carlos soltero nunavut nunavut consumer office supplies
17294
+ 86 515 10/08/28 not specified 21 146.6900 0.05 regular air carlos soltero nunavut nunavut consumer furniture
17295
+ 1 3 10/10/13 low 6 261.5400 0.04 regular air muhammed macintyre nunavut nunavut small business office supplies
17296
+ 50 293 12/10/01 high 27 244.5700 0.01 regular air barry french nunavut nunavut consumer office supplies
17297
+ 80 483 11/07/10 high 30 4965.7595 0.08 regular air clay rozendal nunavut nunavut corporate technology
17298
+
17299
+ # Filter the data based on quantity.
17300
+ >>> anti_df = antiselect_input[antiselect_input.quantity < 30]
17301
+ >>> anti_df
17302
+ orderid orderdate priority quantity sales discount shipmode custname province region custsegment prodcat
17303
+ rowids
17304
+ 97 613 11/06/17 high 12 93.54 0.03 regular air carl jackson nunavut nunavut corporate office supplies
17305
+ 86 515 10/08/28 not specified 21 146.69 0.05 regular air carlos soltero nunavut nunavut consumer furniture
17306
+ 85 515 10/08/28 not specified 19 394.27 0.08 regular air carlos soltero nunavut nunavut consumer office supplies
17307
+ 1 3 10/10/13 low 6 261.54 0.04 regular air muhammed macintyre nunavut nunavut small business office supplies
17308
+ 50 293 12/10/01 high 27 244.57 0.01 regular air barry french nunavut nunavut consumer office supplies
17309
+
17310
+ # Run Antiselect on filtered data. This will create temporary view which will be garbage collected.
17311
+ >>> obj = Antiselect(data=anti_df, exclude=['rowids', 'orderdate', 'discount', 'province', 'custsegment'])
17312
+
17313
+ # Get the view name that is internally created by teradataml to store the result of Antiselect.
17314
+ >>> obj.result.db_object_name
17315
+ '"<schema_name>"."ml__td_sqlmr_out__1752582812690000"'
17316
+
17317
+ # Check the output of Antiselect.
17318
+ >>> obj.result
17319
+ orderid priority quantity sales shipmode custname region prodcat
17320
+ 0 613 high 12 93.54 regular air carl jackson nunavut office supplies
17321
+ 1 515 not specified 21 146.69 regular air carlos soltero nunavut furniture
17322
+ 2 515 not specified 19 394.27 regular air carlos soltero nunavut office supplies
17323
+ 3 293 high 27 244.57 regular air barry french nunavut office supplies
17324
+ 4 3 low 6 261.54 regular air muhammed macintyre nunavut office supplies
17325
+
17326
+ # Describe the resultant DataFrame.
17327
+ >>> df = obj.result.describe() # This will create a temporary view.
17328
+
17329
+ # Get the view name.
17330
+ >>> df.db_object_name
17331
+ '"<schema_name>"."ml__td_sqlmr_out__1752585435339977"'
17332
+
17333
+ # Check the output of describe.
17334
+ >>> df
17335
+ ATTRIBUTE StatName StatValue
17336
+ 0 orderid MAXIMUM 613.000000
17337
+ 1 orderid STANDARD DEVIATION 245.016734
17338
+ 2 orderid PERCENTILES(25) 293.000000
17339
+ 3 orderid PERCENTILES(50) 515.000000
17340
+ 4 quantity COUNT 5.000000
17341
+ 5 quantity MINIMUM 6.000000
17342
+ 6 quantity MAXIMUM 27.000000
17343
+ 7 quantity MEAN 17.000000
17344
+ 8 quantity STANDARD DEVIATION 8.154753
17345
+ 9 quantity PERCENTILES(25) 12.000000
17346
+
17347
+ # Example 1: Persist the view which can be accessed across sessions.
17348
+ >>> df_new = df.create_view(view_name="antiselect_describe_view")
17349
+ >>> df_new
17350
+ ATTRIBUTE StatName StatValue
17351
+ 0 quantity MAXIMUM 27.000000
17352
+ 1 quantity STANDARD DEVIATION 8.154753
17353
+ 2 quantity PERCENTILES(25) 12.000000
17354
+ 3 quantity PERCENTILES(50) 19.000000
17355
+ 4 sales COUNT 5.000000
17356
+ 5 sales MINIMUM 93.540000
17357
+ 6 orderid COUNT 5.000000
17358
+ 7 orderid MINIMUM 3.000000
17359
+ 8 orderid MAXIMUM 613.000000
17360
+ 9 orderid MEAN 387.800000
17361
+
17362
+ # Get the view name.
17363
+ >>> df_new.db_object_name # "<schema_name>" is user connected database.
17364
+ '"<schema_name>"."antiselect_describe_view"'
17365
+
17366
+ """
17367
+ # Argument validation
17368
+ arg_info_matrix = []
17369
+ arg_info_matrix.append(["view_name", view_name, False, (str,), True])
17370
+ arg_info_matrix.append(["schema_name", schema_name, True, (str,), True])
17371
+ _Validators._validate_missing_required_arguments(arg_info_matrix)
17372
+ _Validators._validate_function_arguments(arg_info_matrix)
17373
+
17374
+ # TODO: Investigate and identify issue when volatile tables replaces views in future.
17375
+
17376
+ visited = set()
17377
+ to_persist = []
17378
+ is_teradataml_temp_table = lambda x: x.startswith("ml__") or x.startswith("tdml_")
17379
+ sql_bundle = SQLBundle()
17380
+
17381
+ def trace_views(table_name):
17382
+ if table_name in visited:
17383
+ return
17384
+ visited.add(table_name)
17385
+ base_name = UtilFuncs._extract_table_name(full_qualified_name=table_name)
17386
+ if is_teradataml_temp_table(base_name):
17387
+ to_persist.append(table_name)
17388
+ # Try to get the SQL for the view
17389
+ show_view_sql = sql_bundle._get_sql_query(SQLConstants.SQL_SHOW_VIEW).\
17390
+ format(table_name)
17391
+ try:
17392
+ result = execute_sql(show_view_sql).fetchall()
17393
+ if result:
17394
+ view_sql = result[0][0].replace("\r", "").replace("\n", " ")\
17395
+ .replace("\t", " ").strip()
17396
+
17397
+ # Extract all table names from the view SQL
17398
+ for tname in UtilFuncs.extract_table_names_from_query(view_sql):
17399
+ trace_views(tname)
17400
+ except Exception as e:
17401
+ # Check if error is like 'not a view', then try SHOW TABLE
17402
+ err_msg = str(e).lower()
17403
+ if 'not a view' in err_msg:
17404
+ show_table_sql = sql_bundle._get_sql_query(SQLConstants.SQL_SHOW_TABLE).\
17405
+ format(table_name)
17406
+ try:
17407
+ result = execute_sql(show_table_sql).fetchall()
17408
+ if result:
17409
+ # Table found, nothing to trace further.
17410
+ # This table is persisted.
17411
+ return
17412
+ except Exception as e2:
17413
+ # If SHOW TABLE also fails, raise the exception
17414
+ raise e2
17415
+ else:
17416
+ # If error is not about 'not a view', re-raise
17417
+ raise e
17418
+
17419
+ # 1. Get the query for this DataFrame
17420
+ query = self.show_query()
17421
+ # 2. Extract all table names from the query
17422
+ for tname in UtilFuncs.extract_table_names_from_query(query):
17423
+ trace_views(tname)
17424
+
17425
+ # 3.. Persist the current DataFrame as a permanent object
17426
+ # This CREATE VIEW AS SELECT ...
17427
+ # Use object_name, schema_name as needed.
17428
+ from teradataml.dbutils.dbutils import _get_quoted_object_name
17429
+ target_name = _get_quoted_object_name(schema_name=schema_name, object_name=view_name)
17430
+
17431
+ create_sql = sql_bundle._build_create_view(view_name=target_name,
17432
+ select_expression=query)
17433
+
17434
+ # No try-except here, as we want to raise any error that occurs during execution.
17435
+ execute_sql(create_sql)
17436
+
17437
+ # TODO: Add logger message that these views/tables persisted.
17438
+ # if to_persist:
17439
+ # logger.info("to_persist: ", to_persist)
17440
+
17441
+ # Remove the tables/view from GC file as we need to persist them. Removing only after
17442
+ # required object is created.
17443
+ GarbageCollector._delete_object_entry(objects_to_delete=to_persist,
17444
+ object_type=None,
17445
+ remove_entry_from_gc_list=True)
17446
+
17447
+ # Return the teradataml DataFrame for the persisted object.
17448
+ if schema_name is None:
17449
+ schema_name = tdmlctx._get_current_databasename()
17450
+ return DataFrame(in_schema(schema_name=schema_name, table_name=view_name))
17451
+
17452
+
16334
17453
  class DataFrameGroupBy(DataFrame):
16335
17454
  """
16336
17455
  This class integrate GroupBy clause with AED.
@@ -16382,8 +17501,8 @@ class DataFrameGroupBy(DataFrame):
16382
17501
  include_grouping_columns:
16383
17502
  Optional Argument.
16384
17503
  Specifies whether to include aggregations on the grouping column(s) or not.
16385
- When set to True, the resultant DataFrame will have the aggregations on the
16386
- columns mentioned in "columns". Otherwise, resultant DataFrame will not have
17504
+ When set to True, the resultant DataFrame will have the aggregations on the
17505
+ columns mentioned in "columns". Otherwise, resultant DataFrame will not have
16387
17506
  aggregations on the columns mentioned in "columns".
16388
17507
  Default Value: False
16389
17508
  Types: bool
@@ -16483,7 +17602,8 @@ class DataFrameGroupBy(DataFrame):
16483
17602
 
16484
17603
  new_meta = UtilFuncs._get_metaexpr_using_columns(new_nodeid,
16485
17604
  zip(new_column_names,
16486
- new_column_types))
17605
+ new_column_types),
17606
+ datalake=self._metaexpr.datalake)
16487
17607
 
16488
17608
  return (new_meta, new_nodeid)
16489
17609