teradataml 20.0.0.6__py3-none-any.whl → 20.0.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (96) hide show
  1. teradataml/README.md +210 -0
  2. teradataml/__init__.py +1 -1
  3. teradataml/_version.py +1 -1
  4. teradataml/analytics/analytic_function_executor.py +162 -76
  5. teradataml/analytics/byom/__init__.py +1 -1
  6. teradataml/analytics/json_parser/__init__.py +2 -0
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +95 -2
  8. teradataml/analytics/json_parser/metadata.py +22 -4
  9. teradataml/analytics/sqle/DecisionTreePredict.py +3 -2
  10. teradataml/analytics/sqle/NaiveBayesPredict.py +3 -2
  11. teradataml/analytics/sqle/__init__.py +3 -0
  12. teradataml/analytics/utils.py +4 -1
  13. teradataml/automl/__init__.py +2369 -464
  14. teradataml/automl/autodataprep/__init__.py +15 -0
  15. teradataml/automl/custom_json_utils.py +184 -112
  16. teradataml/automl/data_preparation.py +113 -58
  17. teradataml/automl/data_transformation.py +154 -53
  18. teradataml/automl/feature_engineering.py +113 -53
  19. teradataml/automl/feature_exploration.py +548 -25
  20. teradataml/automl/model_evaluation.py +260 -32
  21. teradataml/automl/model_training.py +399 -206
  22. teradataml/clients/auth_client.py +2 -2
  23. teradataml/common/aed_utils.py +11 -2
  24. teradataml/common/bulk_exposed_utils.py +4 -2
  25. teradataml/common/constants.py +62 -2
  26. teradataml/common/garbagecollector.py +50 -21
  27. teradataml/common/messagecodes.py +47 -2
  28. teradataml/common/messages.py +19 -1
  29. teradataml/common/sqlbundle.py +23 -6
  30. teradataml/common/utils.py +116 -10
  31. teradataml/context/aed_context.py +16 -10
  32. teradataml/data/Employee.csv +5 -0
  33. teradataml/data/Employee_Address.csv +4 -0
  34. teradataml/data/Employee_roles.csv +5 -0
  35. teradataml/data/JulesBelvezeDummyData.csv +100 -0
  36. teradataml/data/byom_example.json +5 -0
  37. teradataml/data/creditcard_data.csv +284618 -0
  38. teradataml/data/docs/byom/docs/ONNXSeq2Seq.py +255 -0
  39. teradataml/data/docs/sqle/docs_17_10/NGramSplitter.py +1 -1
  40. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +1 -1
  41. teradataml/data/docs/sqle/docs_17_20/TextParser.py +1 -1
  42. teradataml/data/jsons/byom/ONNXSeq2Seq.json +287 -0
  43. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +3 -7
  44. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +3 -7
  45. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +3 -7
  46. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +3 -7
  47. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +3 -7
  48. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +3 -7
  49. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +3 -7
  50. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +3 -7
  51. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +3 -7
  52. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +3 -7
  53. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +3 -7
  54. teradataml/data/jsons/sqle/20.00/TD_API_AzureML.json +151 -0
  55. teradataml/data/jsons/sqle/20.00/TD_API_Sagemaker.json +182 -0
  56. teradataml/data/jsons/sqle/20.00/TD_API_VertexAI.json +183 -0
  57. teradataml/data/load_example_data.py +29 -11
  58. teradataml/data/payment_fraud_dataset.csv +10001 -0
  59. teradataml/data/teradataml_example.json +67 -0
  60. teradataml/dataframe/copy_to.py +714 -54
  61. teradataml/dataframe/dataframe.py +1153 -33
  62. teradataml/dataframe/dataframe_utils.py +8 -3
  63. teradataml/dataframe/functions.py +168 -1
  64. teradataml/dataframe/setop.py +4 -1
  65. teradataml/dataframe/sql.py +141 -9
  66. teradataml/dbutils/dbutils.py +470 -35
  67. teradataml/dbutils/filemgr.py +1 -1
  68. teradataml/hyperparameter_tuner/optimizer.py +456 -142
  69. teradataml/lib/aed_0_1.dll +0 -0
  70. teradataml/lib/libaed_0_1.dylib +0 -0
  71. teradataml/lib/libaed_0_1.so +0 -0
  72. teradataml/lib/libaed_0_1_aarch64.so +0 -0
  73. teradataml/scriptmgmt/UserEnv.py +234 -34
  74. teradataml/scriptmgmt/lls_utils.py +43 -17
  75. teradataml/sdk/_json_parser.py +1 -1
  76. teradataml/sdk/api_client.py +9 -6
  77. teradataml/sdk/modelops/_client.py +3 -0
  78. teradataml/series/series.py +12 -7
  79. teradataml/store/feature_store/constants.py +601 -234
  80. teradataml/store/feature_store/feature_store.py +2886 -616
  81. teradataml/store/feature_store/mind_map.py +639 -0
  82. teradataml/store/feature_store/models.py +5831 -214
  83. teradataml/store/feature_store/utils.py +390 -0
  84. teradataml/table_operators/table_operator_util.py +1 -1
  85. teradataml/table_operators/templates/dataframe_register.template +6 -2
  86. teradataml/table_operators/templates/dataframe_udf.template +6 -2
  87. teradataml/utils/docstring.py +527 -0
  88. teradataml/utils/dtypes.py +93 -0
  89. teradataml/utils/internal_buffer.py +2 -2
  90. teradataml/utils/utils.py +41 -2
  91. teradataml/utils/validators.py +694 -17
  92. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/METADATA +213 -2
  93. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/RECORD +96 -81
  94. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/WHEEL +0 -0
  95. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/top_level.txt +0 -0
  96. {teradataml-20.0.0.6.dist-info → teradataml-20.0.0.7.dist-info}/zip-safe +0 -0
@@ -14,9 +14,11 @@ import pandas.api.types as pt
14
14
 
15
15
  from sqlalchemy import MetaData, Table, Column
16
16
  from sqlalchemy.exc import OperationalError as sqlachemyOperationalError
17
+ from teradataml.dataframe.sql import ColumnExpression
17
18
  from teradatasqlalchemy import (INTEGER, BIGINT, BYTEINT, FLOAT)
18
- from teradatasqlalchemy import (TIMESTAMP)
19
+ from teradatasqlalchemy import (TIMESTAMP, DATE)
19
20
  from teradatasqlalchemy import (VARCHAR)
21
+ from teradatasqlalchemy import (PERIOD_DATE,PERIOD_TIMESTAMP)
20
22
  from teradatasqlalchemy.dialect import TDCreateTablePost as post
21
23
  from teradataml.common.aed_utils import AedUtils
22
24
  from teradataml.context.context import *
@@ -25,13 +27,15 @@ from teradataml.dataframe.dataframe_utils import DataFrameUtils as df_utils
25
27
  from teradataml.dbutils.dbutils import _rename_table
26
28
  from teradataml.common.utils import UtilFuncs
27
29
  from teradataml.options.configure import configure
28
- from teradataml.common.constants import CopyToConstants, PTITableConstants
30
+ from teradataml.common.constants import CopyToConstants, PTITableConstants, TeradataTypes
29
31
  from teradatasql import OperationalError
30
32
  from teradataml.common.wrapper_utils import AnalyticsWrapperUtils
31
33
  from teradataml.utils.utils import execute_sql
32
34
  from teradataml.utils.validators import _Validators
33
35
  from teradataml.telemetry_utils.queryband import collect_queryband
36
+ from teradatasqlalchemy.dialect import dialect as td_dialect
34
37
 
38
+ from teradataml.utils.dtypes import _TupleOf
35
39
 
36
40
  @collect_queryband(queryband="CpToSql")
37
41
  def copy_to_sql(df, table_name,
@@ -48,7 +52,12 @@ def copy_to_sql(df, table_name,
48
52
  seq_max=None,
49
53
  set_table=False,
50
54
  chunksize=CopyToConstants.DBAPI_BATCHSIZE.value,
51
- match_column_order=True):
55
+ match_column_order=True,
56
+ partition_by=None,
57
+ partition_by_case=None,
58
+ partition_by_range=None,
59
+ sub_partition=None,
60
+ **kwargs):
52
61
  """
53
62
  Writes records stored in a Pandas DataFrame or a teradataml DataFrame to Teradata Vantage.
54
63
 
@@ -284,6 +293,68 @@ def copy_to_sql(df, table_name,
284
293
  Default Value: True
285
294
  Types: bool
286
295
 
296
+ partition_by:
297
+ Optional Argument.
298
+ Specifies the columns on which partition should be created while creating the table.
299
+ Note:
300
+ 1. "partition_by", "partition_by_case" and "partition_by_range" are mutually exclusive.
301
+ 2. "primary_index" should be specified when "partition_by" is used.
302
+ 3. Not applicable for PTI tables.
303
+ Types: str or ColumnExpression
304
+
305
+ partition_by_case:
306
+ Optional Argument.
307
+ Specifies different cases to partition the index while creating table.
308
+ Note:
309
+ 1. "partition_by", "partition_by_case" and "partition_by_range" are mutually exclusive.
310
+ 2. "primary_index" should be specified when "partition_by_case" is used.
311
+ 3. Not applicable for PTI tables.
312
+ Types: str or ColumnExpression or tuple of ColumnExpression, str
313
+
314
+ partition_by_range:
315
+ Optional Argument.
316
+ Specifies the range of values on which partition should be created while creating a table.
317
+ Note:
318
+ 1. "partition_by", "partition_by_case" and "partition_by_range" are mutually exclusive.
319
+ 2. "primary_index" should be specified when "partition_by_range" is used.
320
+ 3. Not applicable for PTI tables.
321
+ types: str or ColumnExpression
322
+
323
+ sub_partition:
324
+ Optional Argument.
325
+ Specifies the details to subpartition the main partition according to the value provided while creating the table.
326
+ Note:
327
+ 1. "sub_partition" is applicable only when "partition_by_range" is specified.
328
+ 2. Not applicable for PTI tables.
329
+ Types: int or Teradata Interval datatypes
330
+
331
+ **kwargs:
332
+ Optional keyword arguments.
333
+
334
+ valid_time_columns:
335
+ Optional Argument.
336
+ Specifies the name(s) of the valid time columns to be referred in "df".
337
+ When "valid_time_columns" is specified, then function considers
338
+ these columns as valid time dimension columns and creates a
339
+ valid time dimension temporal table if table does not exist.
340
+ Notes:
341
+ * If a string is provided, the column must be of PERIOD type.
342
+ Types: tuple of strings or str
343
+
344
+ derived_column:
345
+ Optional Argument.
346
+ Specifies the name of the derived column to be kept in the temporal table.
347
+ Notes:
348
+ * Argument is ignored if "valid_time_columns" are not specified.
349
+ * Argument is considered only if copy_to_sql() is creating a table.
350
+ * If "valid_time_columns" is specified and "derived_column" is not specified,
351
+ then copy_to_sql() automatically creates a derived column by adding "_" between
352
+ the columns mentioned in "valid_time_columns". For example,
353
+ if "valid_time_columns" is ('col1', 'col2') and "derived_column"
354
+ is not specified, then copy_to_sql() creates table with
355
+ derived column name as 'col1_col2'.
356
+ Types: str
357
+
287
358
  RETURNS:
288
359
  None
289
360
 
@@ -305,32 +376,32 @@ def copy_to_sql(df, table_name,
305
376
  >>> pandas_df = pd.DataFrame(df)
306
377
 
307
378
  a) Save a Pandas DataFrame using a dataframe & table name only:
308
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table')
379
+ >>> copy_to_sql(df=pandas_df, table_name='my_table')
309
380
 
310
381
  b) Saving as a SET table
311
- >>> copy_to_sql(df = pandas_df, table_name = 'my_set_table', index=True,
382
+ >>> copy_to_sql(df=pandas_df, table_name='my_set_table', index=True,
312
383
  primary_index='index_label', set_table=True)
313
384
 
314
385
  c) Save a Pandas DataFrame by specifying additional parameters:
315
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table_2', schema_name = 'alice',
316
- ... index = True, index_label = 'my_index_label', temporary = False,
317
- ... primary_index = ['emp_id'], if_exists = 'append',
318
- ... types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
319
- ... 'emp_id': BIGINT, 'marks': DECIMAL})
386
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_2', schema_name='alice',
387
+ ... index=True, index_label='my_index_label', temporary=False,
388
+ ... primary_index=['emp_id'], if_exists='append',
389
+ ... types={'emp_name': VARCHAR, 'emp_sage':INTEGER,
390
+ ... 'emp_id': BIGINT, 'marks': DECIMAL})
320
391
 
321
392
  d) Saving with additional parameters as a SET table
322
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table_3', schema_name = 'alice',
323
- ... index = True, index_label = 'my_index_label', temporary = False,
324
- ... primary_index = ['emp_id'], if_exists = 'append',
325
- ... types = {'emp_name': VARCHAR, 'emp_sage':INTEGER,
326
- ... 'emp_id': BIGINT, 'marks': DECIMAL},
393
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_3', schema_name='alice',
394
+ ... index=True, index_label='my_index_label', temporary=False,
395
+ ... primary_index=['emp_id'], if_exists='append',
396
+ ... types={'emp_name': VARCHAR, 'emp_sage':INTEGER,
397
+ ... 'emp_id': BIGINT, 'marks': DECIMAL},
327
398
  ... set_table=True)
328
399
 
329
400
  e) Saving levels in index of type MultiIndex
330
401
  >>> pandas_df = pandas_df.set_index(['emp_id', 'emp_name'])
331
- >>> copy_to_sql(df = pandas_df, table_name = 'my_table_4', schema_name = 'alice',
332
- ... index = True, index_label = ['index1', 'index2'], temporary = False,
333
- ... primary_index = ['index1'], if_exists = 'replace')
402
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_4', schema_name='alice',
403
+ ... index=True, index_label=['index1', 'index2'], temporary=False,
404
+ ... primary_index=['index1'], if_exists = 'replace')
334
405
 
335
406
  f) Save a Pandas DataFrame with VECTOR datatype:
336
407
  >>> import pandas as pd
@@ -343,6 +414,68 @@ def copy_to_sql(df, table_name,
343
414
  >>> from teradatasqlalchemy import VECTOR
344
415
  >>> copy_to_sql(df=df, table_name='my_vector_table', types={'array_col': VECTOR})
345
416
 
417
+ g) Saving pandas DataFrame with partition_by:
418
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_5', if_exists='replace',
419
+ ... primary_index=['emp_id'],
420
+ ... partition_by='emp_id')
421
+
422
+ h) Saving pandas DataFrame with partition_by_case:
423
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_6', if_exists='replace',
424
+ ... primary_index=['emp_id'],
425
+ ... partition_by_case='emp_id > 100, emp_id < 500')
426
+
427
+ i) Saving pandas DataFrame with partition_by_range:
428
+ >>> copy_to_sql(df=pandas_df, table_name='my_table_7', if_exists='replace',
429
+ ... primary_index=['emp_id'],
430
+ ... partition_by_range='emp_id BETWEEN 100 AND 500')
431
+
432
+
433
+ j) Save a Pandas DataFrame with valid time columns of DATE type to a temporal table.
434
+ >>> import pandas as pd
435
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
436
+ >>> df = pd.DataFrame({
437
+ ... 'id': [1, 2, 3],
438
+ ... 'start_date': pd.to_datetime(['2024-01-01', '2024-02-01', '2024-03-01']).date,
439
+ ... 'end_date': pd.to_datetime(['2024-01-10', '2024-02-10', '2024-03-10']).date,
440
+ ... 'description': ['a', 'b', 'c']
441
+ ... })
442
+ >>> copy_to_sql(
443
+ ... df=df,
444
+ ... table_name='temporal_table_pandas_date',
445
+ ... valid_time_columns=('start_date', 'end_date')
446
+ ... )
447
+
448
+ k) Save a Pandas DataFrame with valid time columns of TIMESTAMP type
449
+ to a temporal table. Name the derived column as `valid_time`.
450
+ >>> import pandas as pd
451
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
452
+ >>> df = pd.DataFrame({
453
+ ... 'id': [1, 2, 3],
454
+ ... 'start_time': pd.to_datetime(['2024-01-01 10:00:00', '2024-02-01 11:00:00', '2024-03-01 12:00:00']),
455
+ ... 'end_time': pd.to_datetime(['2024-01-01 12:00:00', '2024-02-01 13:00:00', '2024-03-01 14:00:00']),
456
+ ... 'description: ['a', 'b', 'c']
457
+ ... })
458
+ >>> copy_to_sql(
459
+ ... df=df,
460
+ ... table_name='temporal_table_pandas_timestamp',
461
+ ... valid_time_columns=('start_time', 'end_time'),
462
+ ... derived_column='valid_time'
463
+ ... )
464
+
465
+ f) Save a teradataml DataFrame with valid time column of PERIOD type to a temporal table.
466
+ >>> from teradataml.dataframe.dataframe import DataFrame
467
+ >>> from teradataml.dataframe.copy_to import copy_to_sql
468
+ >>> from teradataml.data.load_example_data import load_example_data
469
+ >>> load_example_data("teradataml", "Employee_roles")
470
+ >>> from teradatasqlalchemy.types import PERIOD_DATE
471
+ >>> df = DataFrame('Employee_roles')
472
+ >>> copy_to_sql(
473
+ ... df,
474
+ ... table_name = 'employee_roles_temporal',
475
+ ... valid_time_column='role_validity_period',
476
+ ... types={'role_validity_period':PERIOD_DATE}
477
+ ... )
478
+
346
479
  2. Saving a teradataml DataFrame:
347
480
 
348
481
  >>> from teradataml.dataframe.dataframe import DataFrame
@@ -368,14 +501,62 @@ def copy_to_sql(df, table_name,
368
501
  >>> copy_to_sql(df2, 'my_tdml_table_2')
369
502
 
370
503
  d) Save a teradataml DataFrame by using copy_to_sql with additional parameters:
371
- >>> copy_to_sql(df = df2, table_name = 'my_tdml_table_3', schema_name = 'alice',
372
- ... temporary = False, primary_index = None, if_exists = 'append',
373
- ... types = {'masters': VARCHAR, 'gpa':INTEGER})
504
+ >>> copy_to_sql(df=df2, table_name='my_tdml_table_3', schema_name='alice',
505
+ ... temporary=False, primary_index=None, if_exists='append',
506
+ ... types={'masters': VARCHAR, 'gpa':INTEGER})
374
507
 
375
508
  e) Saving as a SET table
376
- >>> copy_to_sql(df = df2, table_name = 'my_tdml_set_table', schema_name = 'alice',
377
- ... temporary = False, primary_index = ['gpa'], if_exists = 'append',
378
- ... types = {'masters': VARCHAR, 'gpa':INTEGER}, set_table = True)
509
+ >>> copy_to_sql(df = df2, table_name = 'my_tdml_set_table', schema_name='alice',
510
+ ... temporary=False, primary_index=['gpa'], if_exists='append',
511
+ ... types={'masters': VARCHAR, 'gpa':INTEGER}, set_table = True)
512
+
513
+ f) Saving a teradataml DataFrame into a table by partitioning the table with column 'gpa':
514
+ >>> copy_to_sql(df=df, table_name='my_tdml_table_4', if_exists='replace',
515
+ ... primary_index=['gpa'],
516
+ ... partition_by=df.gpa)
517
+
518
+ g) Saving a teradataml DataFrame into a table with two partitions as below:
519
+ >>> copy_to_sql(df=df, table_name='my_tdml_table_5', if_exists='replace',
520
+ ... primary_index=['id'],
521
+ ... partition_by_case=(df.id < 100, df.gpa < 5.0))
522
+
523
+ h) Saving a teradataml DataFrame into a table by partitioning the table with different ranges:
524
+ >>> copy_to_sql(df=df, table_name='my_tdml_table_6', if_exists='replace',
525
+ ... primary_index=['id'],
526
+ ... partition_by_range=df.id.between(1, 100))
527
+
528
+ i) Saving a teradataml DataFrame into a table by partitioning the table with different ranges.
529
+ Also sub-partitioning based on INTERVAL:
530
+ >>> load_example_data("dataframe", "sales")
531
+ >>> df = DataFrame('sales')
532
+ >>> from teradatasqlalchemy import INTERVAL_DAY
533
+ >>> copy_to_sql(df=df, table_name='my_tdml_table_7', if_exists='replace',
534
+ ... primary_index="Feb"
535
+ ... partition_by_range=df.datetime.between('2017-01-01', '2017-01-31'),
536
+ ... sub_partition=INTERVAL_DAY(1))
537
+
538
+ j) Save a teradataml DataFrame with valid time columns of DATE type to a temporal table.
539
+ pdf = pd.DataFrame({
540
+ ... 'id': [1, 2, 3],
541
+ ... 'start_date': pd.to_datetime(['2024-01-01', '2024-02-01', '2024-03-01']).date,
542
+ ... 'end_date': pd.to_datetime(['2024-01-10', '2024-02-10', '2024-03-10']).date,
543
+ ... 'description': ['a', 'b', 'c']
544
+ ... })
545
+ >>> df_temporal = DataFrame(data = pdf)
546
+ >>> copy_to_sql(df=df_temporal, table_name='temporal_table_tdml_date',
547
+ ... valid_time_columns=('start_date', 'end_date'))
548
+
549
+ k) Save a teradataml DataFrame with valid time columns of TIMESTAMP type
550
+ to a temporal table. Name the derived column as `validity_period`.
551
+ >>> df_temporal_ts = DataFrame(data = pd.DataFrame({
552
+ ... 'id': [1, 2, 3],
553
+ ... 'start_time': pd.to_datetime(['2024-01-01 10:00:00', '2024-02-01 11:00:00', '2024-03-01 12:00:00']),
554
+ ... 'end_time': pd.to_datetime(['2024-01-01 12:00:00', '2024-02-01 13:00:00', '2024-03-01 14:00:00']),
555
+ ... 'description': ['a', 'b', 'c']
556
+ ... }))
557
+ >>> copy_to_sql(df=df_temporal_ts, table_name='temporal_table_tdml_timestamp',
558
+ ... valid_time_columns=('start_time', 'end_time'), derived_column='validity_period')
559
+
379
560
 
380
561
  3. Saving a teradataml DataFrame as a PTI table:
381
562
 
@@ -403,6 +584,10 @@ def copy_to_sql(df, table_name,
403
584
  ... set_table=True)
404
585
 
405
586
  """
587
+ # Accept valid_time_columns and derived_column from kwargs
588
+ valid_time_columns = kwargs.get("valid_time_columns", None)
589
+ derived_column = kwargs.get("derived_column", None)
590
+
406
591
  # Deriving global connection using get_connection().
407
592
  con = get_connection()
408
593
 
@@ -460,6 +645,12 @@ def copy_to_sql(df, table_name,
460
645
 
461
646
  dt_obj._validate()
462
647
 
648
+ # Validate partition arguments
649
+ _validate_partition_arguments(partition_by=partition_by,
650
+ partition_by_case=partition_by_case,
651
+ partition_by_range=partition_by_range,
652
+ sub_partition=sub_partition)
653
+
463
654
  # If the table created must be a PTI table, then validate additional parameters
464
655
  # Note that if the required parameters for PTI are valid, then other parameters, though being validated,
465
656
  # will be ignored - for example, primary_index
@@ -473,6 +664,13 @@ def copy_to_sql(df, table_name,
473
664
  raise TeradataMlException(Messages.get_message(MessageCodes.SET_TABLE_NO_PI),
474
665
  MessageCodes.SET_TABLE_NO_PI)
475
666
 
667
+ # Check whether valid time columns are passed to consider it as temporal table.
668
+ is_temporal = False
669
+ if valid_time_columns is not None:
670
+ _validate_valid_time_columns(df, valid_time_columns, derived_column,types)
671
+ is_temporal = True
672
+
673
+
476
674
  # Check if destination table exists
477
675
  table_exists = dt_obj._table_exists(con)
478
676
 
@@ -503,35 +701,49 @@ def copy_to_sql(df, table_name,
503
701
  # failing with Blank name in quotation mark. Hence, extracted only the table name.
504
702
  table_name = UtilFuncs._extract_table_name(table_name)
505
703
 
704
+ partition_exp, partition_func = _build_partition_expression(partition_by=partition_by,
705
+ partition_by_case=partition_by_case,
706
+ partition_by_range=partition_by_range,
707
+ sub_partition=sub_partition)
708
+
506
709
  # Let's create the SQLAlchemy table object to recreate the table
507
710
  if not table_exists or if_exists.lower() == 'replace':
508
- if not is_pti:
509
- table = _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table,
510
- types, None if not is_pandas_df else index,
511
- None if not is_pandas_df else index_label)
512
- else:
513
- table = _create_pti_table_object(df, con, table_name, schema_name, temporary,
514
- primary_time_index_name, timecode_column, timezero_date,
515
- timebucket_duration, sequence_column, seq_max,
516
- columns_list, set_table, types,
517
- None if not is_pandas_df else index,
518
- None if not is_pandas_df else index_label)
519
-
520
- if table is not None:
521
- # If the table need to be replaced and there is no table name conflict,
522
- # let's drop the existing table first
523
- if table_exists and not is_conflict:
524
- tbl_name = dt_obj._get_fully_qualified_table_name()
525
- UtilFuncs._drop_table(tbl_name)
526
- try:
527
- table.create(bind=get_context())
528
- except sqlachemyOperationalError as err:
529
- raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED) +
530
- '\n' + str(err),
531
- MessageCodes.TABLE_OBJECT_CREATION_FAILED)
711
+ if is_temporal:
712
+ _create_temporal_table(df, table_name, con, primary_index,
713
+ schema_name, valid_time_columns, derived_column,
714
+ types, None if not is_pandas_df else index,
715
+ None if not is_pandas_df else index_label)
532
716
  else:
533
- raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED),
534
- MessageCodes.TABLE_OBJECT_CREATION_FAILED)
717
+ if is_pti:
718
+ table = _create_pti_table_object(df, con, table_name, schema_name, temporary,
719
+ primary_time_index_name, timecode_column, timezero_date,
720
+ timebucket_duration, sequence_column, seq_max,
721
+ columns_list, set_table, types,
722
+ None if not is_pandas_df else index,
723
+ None if not is_pandas_df else index_label)
724
+ else:
725
+ table = _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table,
726
+ types, None if not is_pandas_df else index,
727
+ None if not is_pandas_df else index_label,
728
+ partition_expression=partition_exp,
729
+ partition_function=partition_func
730
+ )
731
+
732
+ if table is not None:
733
+ # If the table need to be replaced and there is no table name conflict,
734
+ # let's drop the existing table first
735
+ if table_exists and not is_conflict:
736
+ tbl_name = dt_obj._get_fully_qualified_table_name()
737
+ UtilFuncs._drop_table(tbl_name)
738
+ try:
739
+ table.create(bind=get_context())
740
+ except sqlachemyOperationalError as err:
741
+ raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED) +
742
+ '\n' + str(err),
743
+ MessageCodes.TABLE_OBJECT_CREATION_FAILED)
744
+ else:
745
+ raise TeradataMlException(Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED),
746
+ MessageCodes.TABLE_OBJECT_CREATION_FAILED)
535
747
 
536
748
  # Check column compatibility for insertion when table exists and if_exists = 'append'
537
749
  if table_exists and if_exists.lower() == 'append':
@@ -549,7 +761,7 @@ def copy_to_sql(df, table_name,
549
761
  cols, _ = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
550
762
  if match_column_order:
551
763
  cols_compatible = _check_columns_insertion_compatible(table.c, cols, is_pandas_df,
552
- is_pti, timecode_column, sequence_column)
764
+ is_pti, timecode_column, sequence_column, derived_column)
553
765
 
554
766
  if not cols_compatible:
555
767
  raise TeradataMlException(Messages.get_message(MessageCodes.INSERTION_INCOMPATIBLE),
@@ -746,6 +958,143 @@ def _get_index_labels(df, index_label):
746
958
 
747
959
  return ind_names, ind_types
748
960
 
961
+ def _validate_partition_arguments(partition_by=None,
962
+ partition_by_case=None,
963
+ partition_by_range=None,
964
+ sub_partition=None):
965
+ """
966
+ Internal function to validate the partition_by arguments.
967
+
968
+ PARAMETERS:
969
+ partition_by:
970
+ Optional argument.
971
+ Specifies the columns on which PARTITION BY should be created.
972
+ Types: str or ColumnExpression
973
+
974
+ partition_by_case:
975
+ Optional argument.
976
+ Specifies different cases to partition the index.
977
+ Types: str or ColumnExpression or tuple of ColumnExpression, str
978
+
979
+ partition_by_range:
980
+ Optional argument.
981
+ Specifies the range of values of Date columns on which partition to be created.
982
+ Types: str or ColumnExpression
983
+
984
+ sub_partition:
985
+ Optional argument.
986
+ Specifies the details to subpartition the main partition according to the value provided.
987
+ Types: int or Teradata Interval datatypes
988
+
989
+
990
+ RETURNS:
991
+ None
992
+
993
+ RAISES:
994
+ TeradataMlException
995
+
996
+ EXAMPLES:
997
+ >>> _validate_partition_arguments(partition_by='col1')
998
+ >>> _validate_partition_arguments(partition_by_case=(df.col1 < 100, df.col1 < 1000))
999
+ """
1000
+ # Validate partition_by argument
1001
+ arg_matrix = []
1002
+ arg_matrix.append(['partition_by', partition_by, True, (str, ColumnExpression), True])
1003
+ arg_matrix.append(['partition_by_case', partition_by_case, True, (ColumnExpression, str, _TupleOf((str, ColumnExpression))), True])
1004
+ arg_matrix.append(['partition_by_range', partition_by_range, True, (ColumnExpression, str), True])
1005
+ arg_matrix.append(['sub_partition', sub_partition, True, (int, TeradataTypes.TD_RANGE_N_CLAUSE_TYPES.value), True])
1006
+
1007
+ # Validate argument types
1008
+ _Validators._validate_function_arguments(arg_matrix)
1009
+
1010
+ # Validate mutually exclusive arguments
1011
+ _Validators._validate_mutually_exclusive_argument_groups({"partition_by":partition_by},
1012
+ {"partition_by_case":partition_by_case},
1013
+ {"partition_by_range":partition_by_range})
1014
+
1015
+ def _build_partition_expression(partition_by=None,
1016
+ partition_by_case=None,
1017
+ partition_by_range=None,
1018
+ sub_partition=None):
1019
+ """
1020
+ DESCRIPTION:
1021
+ Internal function to build the partitioning expression for the table.
1022
+
1023
+ PARAMETERS:
1024
+ partition_by:
1025
+ Optional argument.
1026
+ Specifies the columns on which PARTITION BY should be created.
1027
+ Types: str or ColumnExpression
1028
+
1029
+ partition_by_case:
1030
+ Optional argument.
1031
+ Specifies different cases to partition the index.
1032
+ Types: str or ColumnExpression or tuple of ColumnExpression, str
1033
+
1034
+ partition_by_range:
1035
+ Optional argument.
1036
+ Specifies the range of values of Date columns on which partition to be created.
1037
+ Types: str or ColumnExpression
1038
+
1039
+ sub_partition:
1040
+ Optional argument.
1041
+ Specifies the details to subpartition the main partition according to the value provided.
1042
+ Types: int or Teradata Interval datatypes
1043
+
1044
+ RAISES:
1045
+ None
1046
+
1047
+ RETURNS:
1048
+ strings containing the partitioning expression and partition function.
1049
+
1050
+ EXAMPLES:
1051
+ >>> _build_partition_expression(partition_by='col1')
1052
+ >>> _build_partition_expression(partition_by_case=(df.col1 < 100, df.col1 < 1000))
1053
+
1054
+ """
1055
+ partition_exp = None
1056
+ partition_fn = None
1057
+ # Check if partition_by expression is a ColumnExpression,
1058
+ # if so, compile it to a string
1059
+ if partition_by:
1060
+ partition_exp = partition_by.compile() if isinstance(partition_by, ColumnExpression) \
1061
+ else partition_by
1062
+
1063
+ # Check if partition_by_case is a ColumnExpression or string,
1064
+ # if string, join to partition_by expression
1065
+ # if ColumnExpression, compile it to a string and join to partition_by expression
1066
+ # if tuple, compile each expression to a string and join to partition_by expression
1067
+ if partition_by_case:
1068
+ partition_fn = "CASE_N"
1069
+ partition_by_case = [partition_by_case] if isinstance(partition_by_case, (str, ColumnExpression)) \
1070
+ else partition_by_case
1071
+ partition_exp = "{}, NO CASE, UNKNOWN".format(
1072
+ ", ".join(str(exp.compile()) if isinstance(exp, ColumnExpression) else str(exp)
1073
+ for exp in partition_by_case))
1074
+
1075
+ # Check if partition_by_range is a ColumnExpression or string,
1076
+ # if so, compile it to a string
1077
+ if partition_by_range:
1078
+ partition_fn = "RANGE_N"
1079
+ sub_partition_clause = ""
1080
+ if isinstance(partition_by_range, ColumnExpression):
1081
+ partition_by_range = partition_by_range.compile()
1082
+
1083
+ # Check if sub_partition provided,
1084
+ # if so, complie the EACH clause for RANGE_N
1085
+ # If sub_partition is an int, the convert to string and add to the clause.
1086
+ # If sub_partition is a TeradataTypes.TD_RANGE_N_CLAUSE_TYPES,
1087
+ # convert to string and extract the precision and add to the clause.
1088
+ if sub_partition:
1089
+ sub_partition_clause = (
1090
+ f" EACH {str(sub_partition)}"
1091
+ if isinstance(sub_partition, int)
1092
+ else f" EACH INTERVAL '{sub_partition.precision}' {str(sub_partition).split(maxsplit=1)[1]}")
1093
+
1094
+ partition_exp = "{0}{1}".format(partition_by_range, sub_partition_clause)
1095
+ # Return partition_by expression and partition function
1096
+ return partition_exp, partition_fn
1097
+
749
1098
 
750
1099
  def _validate_pti_copy_parameters(df, timecode_column, timebucket_duration,
751
1100
  timezero_date, primary_time_index_name, columns_list,
@@ -1010,7 +1359,7 @@ def _validate_column_type(df, col, col_arg, expected_types, types = None, index
1010
1359
 
1011
1360
 
1012
1361
  def _create_table_object(df, table_name, con, primary_index, temporary, schema_name, set_table, types, index=None,
1013
- index_label=None):
1362
+ index_label=None, partition_expression=None, partition_function=None):
1014
1363
  """
1015
1364
  This is an internal function used to construct a SQLAlchemy Table Object.
1016
1365
  This function checks appropriate flags and supports creation of Teradata
@@ -1041,6 +1390,12 @@ def _create_table_object(df, table_name, con, primary_index, temporary, schema_n
1041
1390
  When True, an attempt to create a SET table is made.
1042
1391
  When False, an attempt to create a MULTISET table is made.
1043
1392
 
1393
+ partition_expression:
1394
+ Specifies the partitioning expression to be used for partition by clause.
1395
+
1396
+ partition_function:
1397
+ Specifies the partitioning function to be used with partition by clause.
1398
+
1044
1399
  types:
1045
1400
  Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1046
1401
 
@@ -1097,6 +1452,11 @@ def _create_table_object(df, table_name, con, primary_index, temporary, schema_n
1097
1452
  else:
1098
1453
  pti = pti.no_primary_index()
1099
1454
 
1455
+ # Partitioning expression and function
1456
+ if partition_expression:
1457
+ pti = pti.partition_by(partition_expression=partition_expression,
1458
+ partition_fn=partition_function)
1459
+
1100
1460
  # Create default Table construct with parameter dictionary
1101
1461
  table = Table(table_name, meta,
1102
1462
  *(Column(col_name, col_type)
@@ -1243,6 +1603,142 @@ def _create_pti_table_object(df, con, table_name, schema_name, temporary, primar
1243
1603
 
1244
1604
  return table
1245
1605
 
1606
+ def _create_temporal_table(df, table_name, con, primary_index, schema_name,
1607
+ valid_time_columns, derived_column, types, index=None, index_label=None):
1608
+ """
1609
+ This is an internal function used to construct a CREATE TABLE statement for a Teradata temporal table.
1610
+ Supports creation of tables with a PERIOD FOR derived column using the specified valid time columns.
1611
+
1612
+ PARAMETERS:
1613
+ df:
1614
+ Required Arugment.
1615
+ The teradataml or Pandas DataFrame object to be saved.
1616
+ Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
1617
+
1618
+ table_name:
1619
+ Required Argument.
1620
+ Name of SQL table.
1621
+ Types: String
1622
+
1623
+ con:
1624
+ Optional Argument.
1625
+ A SQLAlchemy connectable (engine/connection) object.
1626
+ Types: SQLAlchemy Engine or Connection
1627
+
1628
+ primary_index:
1629
+ Optional Argument.
1630
+ Creates Teradata Table(s) with Primary index column if specified.
1631
+ Types: String or list of Strings
1632
+
1633
+ schema_name:
1634
+ Optional Argument.
1635
+ Specifies the name of the SQL schema in the database to write to.
1636
+ Types: String
1637
+
1638
+ valid_time_columns:
1639
+ Required Argument.
1640
+ Specifies a tuple of two column names representing the temporal validity period.
1641
+ Types: tuple of Strings or str
1642
+
1643
+ derived_column:
1644
+ Optional Argument.
1645
+ Specifies the name of the derived PERIOD FOR column to be created.
1646
+ Types: String
1647
+
1648
+ types:
1649
+ Optional Argument.
1650
+ Specifies a python dictionary with column-name(key) to column-type(value) mapping to create DataFrames.
1651
+ Types: dict
1652
+
1653
+ index:
1654
+ Optional Argument.
1655
+ Flag specifying whether to write Pandas DataFrame index as a column(s) or not.
1656
+ Types: Boolean
1657
+
1658
+ index_label:
1659
+ Optional Argument.
1660
+ Column label(s) for index column(s).
1661
+ Types: String or list of Strings
1662
+
1663
+ RETURNS:
1664
+ None
1665
+
1666
+ RAISES:
1667
+ TeradataMlException
1668
+
1669
+ EXAMPLES:
1670
+ _create_temporal_table(
1671
+ df=my_df,
1672
+ table_name='temporal_table',
1673
+ con=td_connection,
1674
+ primary_index=['id'],
1675
+ schema_name='my_schema',
1676
+ valid_time_columns=('start_date', 'end_date'),
1677
+ derived_column='validity_period',
1678
+ types={'id': INTEGER, 'start_date': DATE, 'end_date': DATE},
1679
+ index=False,
1680
+ index_label=None
1681
+ )
1682
+
1683
+ """
1684
+
1685
+ # Extract column names and types
1686
+ if isinstance(df, pd.DataFrame):
1687
+ col_names, col_types = _extract_column_info(df, types, index, index_label)
1688
+ else:
1689
+ col_names, col_types = df_utils._get_column_names_and_types_from_metaexpr(df._metaexpr)
1690
+ if types is not None:
1691
+ col_types = [types.get(col_name, col_type) for col_name, col_type in zip(col_names, col_types)]
1692
+
1693
+ columns_clause_ = []
1694
+ # Ensure all col_types are instances, not classes
1695
+ for i, col_type in enumerate(col_types):
1696
+ if isinstance(col_type, type):
1697
+ col_types[i] = col_type()
1698
+ # Use col_names and col_types to build the columns clause
1699
+ # Compile column types to string using the dialect of the current connection
1700
+ # Add NOT NULL to valid_time_columns
1701
+ for col_name, col_type in zip(col_names, col_types):
1702
+ col_def = '{} {}'.format(col_name, col_type.compile(dialect=td_dialect()))
1703
+
1704
+ if col_name in valid_time_columns:
1705
+ col_def += ' NOT NULL'
1706
+ if isinstance(col_type, (PERIOD_DATE, PERIOD_TIMESTAMP)):
1707
+ col_def += ' AS VALIDTIME'
1708
+ columns_clause_.append(col_def)
1709
+
1710
+ period_for_clause = []
1711
+ if isinstance(valid_time_columns, tuple):
1712
+ if derived_column is None:
1713
+ derived_column = "_".join(valid_time_columns)
1714
+ period_for_clause = ['PERIOD FOR {} ({}, {}) AS VALIDTIME'.format(
1715
+ derived_column, valid_time_columns[0], valid_time_columns[1])
1716
+ ]
1717
+ columns_clause = ",\n ".join(columns_clause_ + period_for_clause)
1718
+
1719
+ # Prepare primary index clause.
1720
+ if primary_index:
1721
+ primary_index_clause = "PRIMARY INDEX ({})".format(
1722
+ ", ".join(UtilFuncs._as_list(primary_index)))
1723
+ else:
1724
+ primary_index_clause = ""
1725
+
1726
+ # Prepare create table statement.
1727
+ table_name = UtilFuncs._get_qualified_table_name(schema_name, table_name) if\
1728
+ schema_name else table_name
1729
+ sql = """
1730
+ CREATE MULTISET TABLE {}
1731
+ (\n{}\n)\n{}
1732
+ """.format(table_name, columns_clause, primary_index_clause)
1733
+ try:
1734
+ execute_sql(sql)
1735
+ except Exception as err:
1736
+ raise TeradataMlException(
1737
+ Messages.get_message(MessageCodes.TABLE_OBJECT_CREATION_FAILED) +
1738
+ '\n' + str(err),
1739
+ MessageCodes.TABLE_OBJECT_CREATION_FAILED
1740
+ )
1741
+
1246
1742
 
1247
1743
  def _rename_column(col_names, search_for, rename_to):
1248
1744
  """
@@ -1370,7 +1866,7 @@ def _reorder_insert_list_for_pti(df_column_list, timecode_column, sequence_colum
1370
1866
 
1371
1867
 
1372
1868
  def _check_columns_insertion_compatible(table1_col_object, table2_cols, is_pandas_df=False,
1373
- is_pti=False, timecode_column=None, sequence_column=None):
1869
+ is_pti=False, timecode_column=None, sequence_column=None, derived_column=None):
1374
1870
  """
1375
1871
  Internal function used to extract column information from two lists of SQLAlchemy ColumnExpression objects;
1376
1872
  and check if the number of columns and their names are matching to determine table insertion compatibility.
@@ -1394,11 +1890,15 @@ def _check_columns_insertion_compatible(table1_col_object, table2_cols, is_panda
1394
1890
  timecode_column:
1395
1891
  timecode_column required to order the select expression for the insert.
1396
1892
  It should be the first column in the select expression.
1397
- q
1893
+
1398
1894
  sequence_column:
1399
1895
  sequence_column required to order the select expression for the insert.
1400
1896
  It should be the second column in the select expression.
1401
1897
 
1898
+ derived_column:
1899
+ Specifies a derived column that is part of the table schema but not
1900
+ part of insert.
1901
+ Types: String
1402
1902
 
1403
1903
  RETURNS:
1404
1904
  a) True, when insertion compatible (number of columns and their names match)
@@ -1410,11 +1910,16 @@ def _check_columns_insertion_compatible(table1_col_object, table2_cols, is_panda
1410
1910
  EXAMPLES:
1411
1911
  _check_columns_insertion_compatible(table1.c, ['co1', 'col2'], False)
1412
1912
  _check_columns_insertion_compatible(table1.c, (['co1', 'col2'], [int, str]), True, True, 'ts', 'seq')
1913
+ _check_columns_insertion_compatible(table1.c, (['co1', 'col2'], [int, str]), True, True, 'ts', 'seq', 'derived_col')
1413
1914
 
1414
1915
  """
1415
1916
  table1_col_names, _ = UtilFuncs._extract_table_object_column_info(table1_col_object)
1416
1917
  table2_col_names = table2_cols[0] if is_pandas_df else table2_cols
1417
1918
 
1919
+ # Remove derived_column from table1_col_names if specified
1920
+ if derived_column is not None and derived_column in table1_col_names:
1921
+ table1_col_names.remove(derived_column)
1922
+
1418
1923
  # Check for number of columns
1419
1924
  if len(table1_col_names) != len(table2_col_names):
1420
1925
  return False
@@ -1783,3 +2288,158 @@ def _validate_timezero_date(timezero_date):
1783
2288
 
1784
2289
  # Looks like the value is valid
1785
2290
  return True
2291
+
2292
+ def _validate_valid_time_columns(df, valid_time_columns, derived_column=None, types=None):
2293
+ """
2294
+ Internal function to validate that the columns specified in valid_time_columns
2295
+ exist in the DataFrame, are of type DATE or TIMESTAMP, and are of the same type.
2296
+ Also checks that the derived_column, if specified, is not present in the DataFrame.
2297
+
2298
+ PARAMETERS:
2299
+ df:
2300
+ Required Argument.
2301
+ Specifies the Pandas or teradataml DataFrame object to be validated.
2302
+ Types: pandas.DataFrame or teradataml.dataframe.dataframe.DataFrame
2303
+
2304
+ valid_time_columns:
2305
+ Required Argument.
2306
+ Specifies a tuple of two column names representing the temporal validity period.
2307
+ Types: tuple of Strings
2308
+
2309
+ derived_column:
2310
+ Optional Argument.
2311
+ Specifies the name of the derived column that should not be
2312
+ present in the DataFrame.
2313
+ Types: String
2314
+
2315
+ types:
2316
+ Optional Argument.
2317
+ Specifies a python dictionary with column-name(key) to column-type(value)
2318
+ mapping to create DataFrames.
2319
+ Types: dict
2320
+
2321
+ RETURNS:
2322
+ None
2323
+
2324
+ RAISES:
2325
+ TeradataMlException
2326
+
2327
+ EXAMPLES:
2328
+ _validate_valid_time_columns(
2329
+ df=my_df,
2330
+ valid_time_columns=('start_date', 'end_date'),
2331
+ derived_column='validity_period',
2332
+ types={'start_date': DATE, 'end_date': DATE}
2333
+ )
2334
+ """
2335
+ df_columns = _get_pd_df_column_names(df) if isinstance(df, pd.DataFrame) else df.columns
2336
+ df_dtypes = (
2337
+ {
2338
+ col: _get_sqlalchemy_mapping_types(str(df.dtypes[col]))
2339
+ for col in df.dtypes.keys()
2340
+ }
2341
+ if isinstance(df, pd.DataFrame)
2342
+ else df._td_column_names_and_sqlalchemy_types
2343
+ )
2344
+ # If types argument is provided, override the dtypes for those columns
2345
+ if types is not None:
2346
+ for col, typ in types.items():
2347
+ if col in df_columns:
2348
+ df_dtypes[col] = typ
2349
+
2350
+
2351
+ if derived_column is not None and derived_column in df_columns:
2352
+ raise TeradataMlException(
2353
+ Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_FOUND).format(
2354
+ derived_column, 'derived_column', 'dataframe.', 'Provide value which is not part of DataFrame columns'
2355
+ ),
2356
+ MessageCodes.TDMLDF_COLUMN_IN_ARG_FOUND
2357
+ )
2358
+ # valid_time_columns can be a tuple of two column names or a single column name
2359
+ if isinstance(valid_time_columns, tuple):
2360
+ if len(valid_time_columns) != 2:
2361
+ raise TeradataMlException(
2362
+ Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(
2363
+ valid_time_columns, 'valid_time_columns', 'tuple of two column names'
2364
+ ),
2365
+ MessageCodes.INVALID_ARG_VALUE
2366
+ )
2367
+ # Check if both columns are present in the DataFrame
2368
+ for col in valid_time_columns:
2369
+ if col not in df_columns:
2370
+ raise TeradataMlException(
2371
+ Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(
2372
+ col, 'valid_time_columns', 'df', 'DataFrame'
2373
+ ),
2374
+ MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND
2375
+ )
2376
+
2377
+ col1_type = df_dtypes[valid_time_columns[0]]
2378
+ col2_type = df_dtypes[valid_time_columns[1]]
2379
+
2380
+ # When types are specified, ensure they are DATE or TIMESTAMP objects or classes.
2381
+ if not (
2382
+ isinstance(col1_type, TIMESTAMP) or isinstance(col1_type, DATE) or
2383
+ col1_type is TIMESTAMP or col1_type is DATE
2384
+ ):
2385
+ raise TeradataMlException(
2386
+ Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).format(
2387
+ 'valid_time_columns',
2388
+ col1_type.__name__ if isinstance(col1_type, type)
2389
+ else col1_type.__class__.__name__, 'DATE or TIMESTAMP'
2390
+ ),
2391
+ MessageCodes.INVALID_COLUMN_TYPE
2392
+ )
2393
+ # When types are specified, ensure they are DATE or TIMESTAMP objects or classes.
2394
+ if not (
2395
+ isinstance(col2_type, TIMESTAMP) or isinstance(col2_type, DATE) or
2396
+ col2_type is TIMESTAMP or col2_type is DATE
2397
+ ):
2398
+ raise TeradataMlException(
2399
+ Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).format(
2400
+ 'valid_time_columns',
2401
+ col2_type.__name__ if isinstance(col2_type, type)
2402
+ else col2_type.__class__.__name__, 'DATE or TIMESTAMP'
2403
+ ),
2404
+ MessageCodes.INVALID_COLUMN_TYPE
2405
+ )
2406
+
2407
+ if type(col1_type) != type(col2_type):
2408
+ raise ValueError(
2409
+ Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(
2410
+ valid_time_columns, 'valid_time_columns', 'both columns of same type (DATE or TIMESTAMP)'
2411
+ ),
2412
+ MessageCodes.INVALID_ARG_VALUE
2413
+ )
2414
+ elif isinstance(valid_time_columns, str):
2415
+ col = valid_time_columns
2416
+ col_type = df_dtypes[col]
2417
+
2418
+ if col not in df_columns:
2419
+ raise TeradataMlException(
2420
+ Messages.get_message(MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND).format(
2421
+ col, 'valid_time_columns', 'df', 'DataFrame'
2422
+ ),
2423
+ MessageCodes.TDMLDF_COLUMN_IN_ARG_NOT_FOUND
2424
+ )
2425
+ # When types are specified, ensure they are PERIOD_DATE or PERIOD_TIMESTAMP objects or classes.
2426
+ if not (
2427
+ isinstance(col_type, PERIOD_TIMESTAMP) or isinstance(col_type, PERIOD_DATE) or
2428
+ col_type is PERIOD_TIMESTAMP or col_type is PERIOD_DATE
2429
+ ):
2430
+ raise TeradataMlException(
2431
+ Messages.get_message(MessageCodes.INVALID_COLUMN_TYPE).format(
2432
+ 'valid_time_columns',
2433
+ col_type.__name__ if isinstance(col_type, type)
2434
+ else col_type.__class__.__name__, 'PERIOD_DATE or PERIOD_TIMESTAMP'
2435
+ ),
2436
+ MessageCodes.INVALID_COLUMN_TYPE
2437
+ )
2438
+ else:
2439
+ raise TeradataMlException(
2440
+ Messages.get_message(MessageCodes.INVALID_ARG_VALUE).format(
2441
+ valid_time_columns, 'valid_time_columns', 'tuple of two column names or a single column name'
2442
+ ),
2443
+ MessageCodes.INVALID_ARG_VALUE
2444
+ )
2445
+