teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (107) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +86 -13
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +7 -12
  6. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  7. teradataml/analytics/sqle/__init__.py +16 -1
  8. teradataml/analytics/utils.py +15 -1
  9. teradataml/automl/__init__.py +290 -106
  10. teradataml/automl/autodataprep/__init__.py +471 -0
  11. teradataml/automl/data_preparation.py +29 -10
  12. teradataml/automl/data_transformation.py +11 -0
  13. teradataml/automl/feature_engineering.py +64 -4
  14. teradataml/automl/feature_exploration.py +639 -25
  15. teradataml/automl/model_training.py +1 -1
  16. teradataml/clients/auth_client.py +2 -2
  17. teradataml/common/constants.py +61 -26
  18. teradataml/common/messagecodes.py +2 -1
  19. teradataml/common/messages.py +5 -4
  20. teradataml/common/utils.py +255 -37
  21. teradataml/context/context.py +225 -87
  22. teradataml/data/apriori_example.json +22 -0
  23. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  24. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  25. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  26. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  27. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  28. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  29. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  30. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  31. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  32. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  33. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  34. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  35. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  36. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  37. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  38. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  39. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  40. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  41. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  42. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  43. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  45. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  46. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  47. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  48. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  49. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  51. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  52. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  53. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  54. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  55. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  56. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  57. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  58. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  59. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  60. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  61. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  62. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  63. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  64. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  65. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  66. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  67. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  68. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  69. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  70. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
  71. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
  72. teradataml/data/ner_dict.csv +8 -0
  73. teradataml/data/ner_input_eng.csv +7 -0
  74. teradataml/data/ner_rule.csv +5 -0
  75. teradataml/data/pos_input.csv +40 -0
  76. teradataml/data/tdnerextractor_example.json +14 -0
  77. teradataml/data/teradataml_example.json +13 -0
  78. teradataml/data/textmorph_example.json +5 -0
  79. teradataml/data/to_num_data.csv +4 -0
  80. teradataml/data/tochar_data.csv +5 -0
  81. teradataml/data/trans_dense.csv +16 -0
  82. teradataml/data/trans_sparse.csv +55 -0
  83. teradataml/dataframe/copy_to.py +37 -26
  84. teradataml/dataframe/data_transfer.py +61 -45
  85. teradataml/dataframe/dataframe.py +130 -50
  86. teradataml/dataframe/dataframe_utils.py +15 -2
  87. teradataml/dataframe/functions.py +109 -9
  88. teradataml/dataframe/sql.py +328 -76
  89. teradataml/dbutils/dbutils.py +33 -13
  90. teradataml/dbutils/filemgr.py +14 -10
  91. teradataml/lib/aed_0_1.dll +0 -0
  92. teradataml/opensource/_base.py +6 -157
  93. teradataml/options/configure.py +4 -5
  94. teradataml/scriptmgmt/UserEnv.py +305 -38
  95. teradataml/scriptmgmt/lls_utils.py +376 -130
  96. teradataml/store/__init__.py +1 -1
  97. teradataml/table_operators/Apply.py +16 -1
  98. teradataml/table_operators/Script.py +20 -1
  99. teradataml/table_operators/table_operator_util.py +58 -9
  100. teradataml/utils/dtypes.py +2 -1
  101. teradataml/utils/internal_buffer.py +22 -2
  102. teradataml/utils/validators.py +313 -57
  103. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +89 -14
  104. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +107 -77
  105. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
  106. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
  107. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
@@ -5480,6 +5480,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5480
5480
  self._quotechar = kw.get("quotechar", None)
5481
5481
  self._udf_script = kw.get("udf_script", None)
5482
5482
  self.alias_name = self.compile() if (self._udf or self._udf_script) is None else None
5483
+ self._debug = kw.get("debug", False)
5483
5484
 
5484
5485
  @property
5485
5486
  def expression(self):
@@ -7002,12 +7003,12 @@ class _SQLColumnExpression(_LogicalColumnExpression,
7002
7003
  Specifies starting position to extract string from column.
7003
7004
  Note:
7004
7005
  Index position starts with 1 instead of 0.
7005
- Types: int
7006
+ Types: int OR ColumnExpression
7006
7007
 
7007
7008
  length:
7008
7009
  Required Argument.
7009
7010
  Specifies the length of the string to extract from column.
7010
- Types: int
7011
+ Types: int OR ColumnExpression
7011
7012
 
7012
7013
  RETURNS:
7013
7014
  ColumnExpression.
@@ -7040,8 +7041,20 @@ class _SQLColumnExpression(_LogicalColumnExpression,
7040
7041
  emp_name mgr_id mgr_name new_col
7041
7042
  emp_id
7042
7043
  500 Fred 400 Kim on
7044
+
7045
+ # Example 3: Create a new column by passing ColumnExpression as
7046
+ # start_pos and length.
7047
+ >>> df.assign(new_column = df.emp_name.substr(df.emp_id, df.mgr_id))
7048
+ emp_name mgr_id mgr_name new_column
7049
+ emp_id
7050
+ 1 Pat 2 Don Pa
7051
+
7043
7052
  """
7044
- return _SQLColumnExpression(func.substr(self.expression, start_pos, length),
7053
+ # Handle cases where start_pos or length are ColumnExpressions.
7054
+ start_pos_expr = start_pos.expression if isinstance(start_pos, _SQLColumnExpression) else start_pos
7055
+ length_expr = length.expression if isinstance(length, _SQLColumnExpression) else length
7056
+
7057
+ return _SQLColumnExpression(func.substr(self.expression, start_pos_expr, length_expr),
7045
7058
  type=self.type)
7046
7059
 
7047
7060
  @collect_queryband(queryband="DFC_replace")
@@ -8954,8 +8967,8 @@ class _SQLColumnExpression(_LogicalColumnExpression,
8954
8967
  PARAMETERS:
8955
8968
  formatter:
8956
8969
  Optional Argument.
8957
- Specifies the string to format the values of a column.
8958
- Type: str
8970
+ Specifies the format for formatting the values of the column.
8971
+ Type: str OR ColumnExpression
8959
8972
  Note:
8960
8973
  * If 'formatter' is omitted, numeric values is converted to a string exactly
8961
8974
  long enough to hold its significant digits.
@@ -9525,94 +9538,333 @@ class _SQLColumnExpression(_LogicalColumnExpression,
9525
9538
 
9526
9539
  EXAMPLES:
9527
9540
  # Load the data to run the example.
9528
- >>> load_example_data("uaf", "stock_data")
9541
+ >>> load_example_data("teradataml", "tochar_data")
9529
9542
 
9530
- # Create a DataFrame on 'stock_data' table.
9531
- >>> df = DataFrame("stock_data")
9543
+ # Create a DataFrame on 'tochar_data' table.
9544
+ >>> df = DataFrame("tochar_data")
9532
9545
  >>> df
9533
- seq_no timevalue magnitude
9534
- data_set_id
9535
- 556 3 19/01/16 61.080
9536
- 556 5 19/01/30 63.810
9537
- 556 6 19/02/06 63.354
9538
- 556 7 19/02/13 63.871
9539
- 556 9 19/02/27 61.490
9540
- 556 10 19/03/06 61.524
9541
- 556 8 19/02/20 61.886
9542
- 556 4 19/01/23 63.900
9543
- 556 2 19/01/09 61.617
9544
- 556 1 19/01/02 60.900
9546
+ int_col float_col date_col int_format float_format date_format
9547
+ id
9548
+ 3 1314 123.46 03/09/17 XXXX TM9 DY
9549
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD
9550
+ 2 789 123.46 03/09/17 0999 9999.9 DAY
9551
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD
9552
+
9545
9553
  >>> df.tdtypes
9546
- column type
9547
- data_set_id INTEGER()
9548
- seq_no INTEGER()
9549
- timevalue DATE()
9550
- magnitude FLOAT()
9551
-
9552
- # Example 1: Convert 'seq_no' column to character type.
9553
- >>> res = df.assign(seq_no = df.seq_no.to_char())
9554
+ COLUMN NAME TYPE
9555
+ id INTEGER()
9556
+ int_col INTEGER()
9557
+ float_col FLOAT()
9558
+ date_col DATE()
9559
+ int_format VARCHAR(length=20, charset='LATIN')
9560
+ float_format VARCHAR(length=20, charset='LATIN')
9561
+ date_format VARCHAR(length=20, charset='LATIN')
9562
+
9563
+ # Example 1: Convert 'int_col' column to character type.
9564
+ >>> res = df.assign(int_col = df.int_col.to_char())
9554
9565
  >>> res
9555
- seq_no timevalue magnitude
9556
- data_set_id
9557
- 556 3 19/01/16 61.080
9558
- 556 5 19/01/30 63.810
9559
- 556 6 19/02/06 63.354
9560
- 556 7 19/02/13 63.871
9561
- 556 9 19/02/27 61.490
9562
- 556 10 19/03/06 61.524
9563
- 556 8 19/02/20 61.886
9564
- 556 4 19/01/23 63.900
9565
- 556 2 19/01/09 61.617
9566
- 556 1 19/01/02 60.900
9566
+ int_col float_col date_col int_format float_format date_format
9567
+ id
9568
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD
9569
+ 3 1314 123.46 03/09/17 XXXX TM9 DY
9570
+ 2 789 123.46 03/09/17 0999 9999.9 DAY
9571
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD
9572
+
9567
9573
  >>> res.tdtypes
9568
- columnn type
9569
- data_set_id INTEGER()
9570
- seq_no VARCHAR()
9571
- timevalue DATE()
9572
- magnitude FLOAT()
9573
-
9574
- # Example 2: Convert "magnitude" column to character type in '$99.9' format.
9575
- >>> res = df.assign(char_column = df.magnitude.to_char('$99.9'))
9574
+ COLUMN NAME TYPE
9575
+ id INTEGER()
9576
+ int_col VARCHAR()
9577
+ float_col FLOAT()
9578
+ date_col DATE()
9579
+ int_format VARCHAR(length=20, charset='LATIN')
9580
+ float_format VARCHAR(length=20, charset='LATIN')
9581
+ date_format VARCHAR(length=20, charset='LATIN')
9582
+
9583
+ # Example 2: Convert 'float_col' column to character type in '$999.9' format.
9584
+ >>> res = df.assign(char_col = df.float_col.to_char('$999.9'))
9576
9585
  >>> res
9577
- seq_no timevalue magnitude char_column
9578
- data_set_id
9579
- 556 3 19/01/16 61.080 $61.1
9580
- 556 5 19/01/30 63.810 $63.8
9581
- 556 6 19/02/06 63.354 $63.4
9582
- 556 7 19/02/13 63.871 $63.9
9583
- 556 9 19/02/27 61.490 $61.5
9584
- 556 10 19/03/06 61.524 $61.5
9585
- 556 8 19/02/20 61.886 $61.9
9586
- 556 4 19/01/23 63.900 $63.9
9587
- 556 2 19/01/09 61.617 $61.6
9588
- 556 1 19/01/02 60.900 $60.9
9589
-
9590
- # Example 3: Convert "timevalue" column to character type in 'YYYY-DAY-MONTH' format
9591
- >>> res = df.assign(timevalue = df.timevalue.to_char('YYYY-DAY-MONTH'))
9586
+ int_col float_col date_col int_format float_format date_format char_col
9587
+ id
9588
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD $234.6
9589
+ 3 1314 123.46 03/09/17 XXXX TM9 DY $123.5
9590
+ 2 789 123.46 03/09/17 0999 9999.9 DAY $123.5
9591
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD $234.6
9592
+
9593
+ # Example 3: Convert 'date_col' column to character type in 'YYYY-DAY-MONTH' format
9594
+ >>> res = df.assign(char_col = df.date_col.to_char('YYYY-DAY-MONTH'))
9592
9595
  >>> res
9593
- seq_no timevalue magnitude
9594
- data_set_id
9595
- 556 3 2019-WEDNESDAY-JANUARY 61.080
9596
- 556 5 2019-WEDNESDAY-JANUARY 63.810
9597
- 556 6 2019-WEDNESDAY-FEBRUARY 63.354
9598
- 556 7 2019-WEDNESDAY-FEBRUARY 63.871
9599
- 556 9 2019-WEDNESDAY-FEBRUARY 61.490
9600
- 556 10 2019-WEDNESDAY-MARCH 61.524
9601
- 556 8 2019-WEDNESDAY-FEBRUARY 61.886
9602
- 556 4 2019-WEDNESDAY-JANUARY 63.900
9603
- 556 2 2019-WEDNESDAY-JANUARY 61.617
9604
- 556 1 2019-WEDNESDAY-JANUARY 60.900
9596
+ int_col float_col date_col int_format float_format date_format char_col
9597
+ id
9598
+ 3 1314 123.4600 03/09/17 XXXX TM9 DY 1903-THURSDAY -SEPTEMBER
9599
+ 0 1234 234.5600 03/09/17 9,999 999D9 MM-DD 1903-THURSDAY -SEPTEMBER
9600
+ 2 789 123.4600 03/09/17 0999 9999.9 DAY 1903-THURSDAY -SEPTEMBER
9601
+ 1 456 234.5600 03/09/17 $999 9.9EEEE CCAD 1903-THURSDAY -SEPTEMBER
9602
+
9603
+ # Example 4: Convert 'int_col' column to character type in 'int_format' column format.
9604
+ >>> res = df.assign(char_col = df.int_col.to_char(df.int_format))
9605
+ >>> res
9606
+ int_col float_col date_col int_format float_format date_format char_col
9607
+ id
9608
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD 1,234
9609
+ 3 1314 123.46 03/09/17 XXXX TM9 DY 522
9610
+ 2 789 123.46 03/09/17 0999 9999.9 DAY 0789
9611
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD $456
9612
+
9613
+ # Example 5: Convert 'float_col' column to character type in 'float_format' column format.
9614
+ >>> res = df.assign(char_col = df.float_col.to_char(df.float_format))
9615
+ >>> res
9616
+ int_col float_col date_col int_format float_format date_format char_col
9617
+ id
9618
+ 2 789 123.46 03/09/17 0999 9999.9 DAY 123.5
9619
+ 3 1314 123.46 03/09/17 XXXX TM9 DY 123.46
9620
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD 2.3E+02
9621
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD 234.6
9622
+
9623
+ # Example 4: Convert 'date_col' column to character type in 'date_format' column format.
9624
+ >>> res = df.assign(char_col = df.date_col.to_char(df.date_format))
9625
+ >>> res
9626
+ int_col float_col date_col int_format float_format date_format char_col
9627
+ id
9628
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD 09-17
9629
+ 3 1314 123.46 03/09/17 XXXX TM9 DY THU
9630
+ 2 789 123.46 03/09/17 0999 9999.9 DAY THURSDAY
9631
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD 20AD
9632
+
9605
9633
  """
9606
9634
  arg_validate = []
9607
- arg_validate.append(["formatter", formatter, True, (str), True])
9635
+ arg_validate.append(["formatter", formatter, True, (str, ColumnExpression), True])
9608
9636
 
9609
9637
  # Validate argument types
9610
9638
  _Validators._validate_function_arguments(arg_validate)
9611
9639
 
9612
9640
  _args=[self.expression]
9613
9641
  if formatter:
9642
+ formatter = formatter.expression if isinstance(formatter, ColumnExpression) else formatter
9614
9643
  _args.append(formatter)
9615
9644
  return _SQLColumnExpression(func.to_char(*_args), type=VARCHAR())
9645
+
9646
+ def to_number(self, formatter=None):
9647
+ """
9648
+ DESCRIPTION:
9649
+ Converts a string-like representation of a number to NUMBER type.
9650
+
9651
+ PARAMETERS:
9652
+ formatter:
9653
+ Optional Argument.
9654
+ Specifies a variable length string containing formatting characters
9655
+ that define the format of the columns.
9656
+ Type: str OR ColumnExpression
9657
+ Note:
9658
+ * If 'formatter' is omitted, numeric values is converted to a string exactly
9659
+ long enough to hold its significant digits.
9660
+
9661
+ * Formatters:
9662
+ +--------------------------------------------------------------------------------------------------+
9663
+ | FORMATTER DESCRIPTION |
9664
+ +--------------------------------------------------------------------------------------------------+
9665
+ | , (comma) A comma in the specified position. |
9666
+ | A comma cannot begin a number format. |
9667
+ | A comma cannot appear to the right of a decimal |
9668
+ | character or period in a number format. |
9669
+ | Example: |
9670
+ | +-------------------------------------------------+ |
9671
+ | | data formatter result | |
9672
+ | +-------------------------------------------------+ |
9673
+ | | "1,234" "9,999" 1234 | |
9674
+ | +-------------------------------------------------+ |
9675
+ +--------------------------------------------------------------------------------------------------+
9676
+ | . (period) A decimal point. Only one allowed in a format. |
9677
+ | Example: |
9678
+ | +-------------------------------------------------+ |
9679
+ | | data formatter result | |
9680
+ | +-------------------------------------------------+ |
9681
+ | | "12.34" "99.99" 12.34 | |
9682
+ | +-------------------------------------------------+ |
9683
+ +--------------------------------------------------------------------------------------------------+
9684
+ | $ A value with a leading dollar sign. |
9685
+ | Example: |
9686
+ | +-------------------------------------------------+ |
9687
+ | | data formatter result | |
9688
+ | +-------------------------------------------------+ |
9689
+ | | "$1234" "$9999" 1234 | |
9690
+ | +-------------------------------------------------+ |
9691
+ +--------------------------------------------------------------------------------------------------+
9692
+ | 0 Leading or trailing zeros. |
9693
+ | Example: |
9694
+ | +-------------------------------------------------+ |
9695
+ | | data formatter result | |
9696
+ | +-------------------------------------------------+ |
9697
+ | | "0123" "0999" 123 | |
9698
+ | | "1230" "9990" 1230 | |
9699
+ | +-------------------------------------------------+ |
9700
+ +--------------------------------------------------------------------------------------------------+
9701
+ | 9 Specified number of digits. |
9702
+ | Leading space if positive, minus if negative. |
9703
+ | Example: |
9704
+ | +-------------------------------------------------+ |
9705
+ | | data formatter result | |
9706
+ | +-------------------------------------------------+ |
9707
+ | | "1234" "9999" 1234 | |
9708
+ | | "-1234" "9999" -1234 | |
9709
+ | +-------------------------------------------------+ |
9710
+ +--------------------------------------------------------------------------------------------------+
9711
+ | B Blanks if integer part is zero. |
9712
+ | Example: |
9713
+ | +-------------------------------------------------+ |
9714
+ | | data formatter result | |
9715
+ | +-------------------------------------------------+ |
9716
+ | | "0" "B9999" 0 | |
9717
+ | +-------------------------------------------------+ |
9718
+ +--------------------------------------------------------------------------------------------------+
9719
+ | C ISO currency symbol (from SDF ISOCurrency). |
9720
+ | Example: |
9721
+ | +-------------------------------------------------+ |
9722
+ | | data formatter result | |
9723
+ | +-------------------------------------------------+ |
9724
+ | | "USD123" "C999" 123 | |
9725
+ | +-------------------------------------------------+ |
9726
+ +--------------------------------------------------------------------------------------------------+
9727
+ | D Radix separator for non-monetary values. |
9728
+ | From SDF RadixSeparator. |
9729
+ | Example: |
9730
+ | +-------------------------------------------------+ |
9731
+ | | data formatter result | |
9732
+ | +-------------------------------------------------+ |
9733
+ | | "12.34" "99D99" 12.34 | |
9734
+ | +-------------------------------------------------+ |
9735
+ +--------------------------------------------------------------------------------------------------+
9736
+ | EEEE Scientific notation. |
9737
+ | Example: |
9738
+ | +-------------------------------------------------+ |
9739
+ | | data formatter result | |
9740
+ | +-------------------------------------------------+ |
9741
+ | | "1.2E+04" "9.9EEEE" 12000 | |
9742
+ | +-------------------------------------------------+ |
9743
+ +--------------------------------------------------------------------------------------------------+
9744
+ | G Group separator for non-monetary values. |
9745
+ | From SDF GroupSeparator. |
9746
+ | Example: |
9747
+ | +-------------------------------------------------+ |
9748
+ | | data formatter result | |
9749
+ | +-------------------------------------------------+ |
9750
+ | | "1,234,567" "9G999G999" 1234567 | |
9751
+ | +-------------------------------------------------+ |
9752
+ +--------------------------------------------------------------------------------------------------+
9753
+ | L Local currency (from SDF Currency element). |
9754
+ | Example: |
9755
+ | +-------------------------------------------------+ |
9756
+ | | data formatter result | |
9757
+ | +-------------------------------------------------+ |
9758
+ | | "$123" "L999" 123 | |
9759
+ | +-------------------------------------------------+ |
9760
+ +--------------------------------------------------------------------------------------------------+
9761
+ | MI Trailing minus sign if value is negative. |
9762
+ | Can only appear in the last position. |
9763
+ | Example: |
9764
+ | +-------------------------------------------------+ |
9765
+ | | data formatter result | |
9766
+ | +-------------------------------------------------+ |
9767
+ | | "1234-" "9999MI" -1234 | |
9768
+ | +-------------------------------------------------+ |
9769
+ +--------------------------------------------------------------------------------------------------+
9770
+ | PR Negative value in angle brackets. |
9771
+ | Positive value with leading/trailing blank. |
9772
+ | Only in the last position. |
9773
+ | Example: |
9774
+ | +-------------------------------------------------+ |
9775
+ | | data formatter result | |
9776
+ | +-------------------------------------------------+ |
9777
+ | | " 123 " "9999PR" 123 | |
9778
+ | +-------------------------------------------------+ |
9779
+ +--------------------------------------------------------------------------------------------------+
9780
+ | S Sign indicator: + / - at beginning or end. |
9781
+ | Can only appear in first or last position. |
9782
+ | Example: |
9783
+ | +-------------------------------------------------+ |
9784
+ | | data formatter result | |
9785
+ | +-------------------------------------------------+ |
9786
+ | | "-1234" "S9999" -1234 | |
9787
+ | +-------------------------------------------------+ |
9788
+ +--------------------------------------------------------------------------------------------------+
9789
+ | U Dual currency (from SDF DualCurrency). |
9790
+ | Example: |
9791
+ | +-------------------------------------------------+ |
9792
+ | | data formatter result | |
9793
+ | +-------------------------------------------------+ |
9794
+ | | "$123" "U999" 123 | |
9795
+ | +-------------------------------------------------+ |
9796
+ +--------------------------------------------------------------------------------------------------+
9797
+ | X Hexadecimal format. |
9798
+ | Accepts only non-negative values. |
9799
+ | Must be preceded by 0 or FM. |
9800
+ | Example: |
9801
+ | +-------------------------------------------------+ |
9802
+ | | data formatter result | |
9803
+ | +-------------------------------------------------+ |
9804
+ | | "FF" "XX" 255 | |
9805
+ | +-------------------------------------------------+ |
9806
+ +--------------------------------------------------------------------------------------------------+
9807
+ RAISES:
9808
+ TypeError, ValueError, TeradataMlException
9809
+
9810
+ RETURNS:
9811
+ ColumnExpression
9812
+
9813
+ EXAMPLES:
9814
+ # Load the data to run the example.
9815
+ >>> load_example_data("teradataml", "to_num_data")
9816
+
9817
+ # Create a DataFrame on 'to_num_data' table.
9818
+ >>> df = DataFrame("to_num_data")
9819
+ >>> df
9820
+ price col_format
9821
+ $1234 $9999
9822
+ USD123 C999
9823
+ 78.12 99.99
9824
+
9825
+ # Example 1: Convert 'price' column to number type without passing any formatter.
9826
+ >>> res = df.assign(new_col=df.price.to_number())
9827
+ >>> res
9828
+ price col_format new_col
9829
+ $1234 $9999 NaN
9830
+ USD123 C999 NaN
9831
+ 78.12 99.99 78.12
9832
+
9833
+ # Example 2: Convert 'price' column to number type by passing formatter as string.
9834
+ >>> res = df.assign(new_col=df.price.to_number('99.99'))
9835
+ >>> res
9836
+ price col_format new_col
9837
+ $1234 $9999 NaN
9838
+ USD123 C999 NaN
9839
+ 78.12 99.99 78.12
9840
+
9841
+ # Example 3: Convert 'price' column to number type by passing formatter as ColumnExpression.
9842
+ >>> res = df.assign(new_col=df.price.to_number(df.col_format))
9843
+ >>> res
9844
+ price col_format new_col
9845
+ $1234 $9999 1234
9846
+ USD123 C999 123
9847
+ 78.12 99.99 78.12
9848
+
9849
+ >>> df.tdtypes
9850
+ price VARCHAR(length=20, charset='LATIN')
9851
+ col_format VARCHAR(length=20, charset='LATIN')
9852
+ new_col NUMBER()
9853
+
9854
+ """
9855
+
9856
+ arg_validate = []
9857
+ arg_validate.append(["formatter", formatter, True, (str, ColumnExpression), True])
9858
+
9859
+ # Validate argument types
9860
+ _Validators._validate_function_arguments(arg_validate)
9861
+
9862
+ _args = [self.expression]
9863
+ if formatter is not None:
9864
+ formatter = formatter.expression if isinstance(formatter, ColumnExpression) else formatter
9865
+ _args.append(formatter)
9866
+
9867
+ return _SQLColumnExpression(func.to_number(*_args), type=NUMBER())
9616
9868
 
9617
9869
  def to_date(self, formatter=None):
9618
9870
  """
@@ -475,7 +475,7 @@ def _get_select_table_kind(schema_name, table_name, table_kind, datalake_name):
475
475
  object_name_str = _convert_sql_search_string_to_regex(object_name_str)
476
476
  if object_name_str:
477
477
  name_filter = pddf['Table/View/Macro Name'].str.strip().str.match(object_name_str, na=False,
478
- flags=re.IGNORECASE)
478
+ flags=re.IGNORECASE)
479
479
  pddf = pddf[name_filter]
480
480
 
481
481
  if object_table_kind is not None:
@@ -2089,13 +2089,13 @@ def set_session_param(name, value):
2089
2089
  either ON or OFF.
2090
2090
  10. dot_notation: DEFAULT, LIST, NULL ERROR
2091
2091
  11. isolated_loading: NO, '', CONCURRENT
2092
- 12. function_trace: should be a list first item should be "mask_string" and second should be table name.
2092
+ 12. function_trace: Should be a list. First item should be "mask_string" and second should be table name.
2093
2093
  13. json_ignore_errors: ON, OFF
2094
- 14. searchuifdbpath: string in format 'database_name, user_name'
2094
+ 14. searchuifdbpath: String in format 'database_name, user_name'
2095
2095
  15. transaction_isolation_level: READ UNCOMMITTED, RU, SERIALIZABLE, SR
2096
- 16. query_band: should be a list first item should be "band_specification" and second should be either
2096
+ 16. query_band: Should be a list. First item should be "band_specification" and second should be either
2097
2097
  SESSION or TRANSACTION
2098
- 17. udfsearchpath: should be a list first item should be "database_name" and second should be "udf_name"
2098
+ 17. udfsearchpath: Should be a list. First item should be "database_name" and second should be "udf_name"
2099
2099
  Types: str or list of strings
2100
2100
 
2101
2101
  Returns:
@@ -2106,7 +2106,7 @@ def set_session_param(name, value):
2106
2106
 
2107
2107
  EXAMPLES:
2108
2108
  # Example 1: Set time zone offset for the session as the system default.
2109
- >>> set_session_param('timezone', "'LOCAL'")
2109
+ >>> set_session_param('timezone', 'LOCAL')
2110
2110
  True
2111
2111
 
2112
2112
  # Example 2: Set time zone to "AMERICA PACIFIC".
@@ -2195,10 +2195,18 @@ def set_session_param(name, value):
2195
2195
  """
2196
2196
  # Validate argument types
2197
2197
  function_args = []
2198
- function_args.append(["name", name, True, str, True])
2199
- function_args.append(["value", value, True, (int, str, float, list), False])
2198
+ function_args.append(["name", name, False, str, True])
2199
+ function_args.append(["value", value, False, (int, str, float, list), False])
2200
2200
  _Validators._validate_function_arguments(function_args)
2201
2201
 
2202
+ # Validate Permitted values for session parameter name.
2203
+ permitted_session_parameters = [key.name for key in SessionParamsSQL]
2204
+ _Validators._validate_permitted_values(arg=name,
2205
+ permitted_values=permitted_session_parameters,
2206
+ arg_name='name',
2207
+ case_insensitive=True,
2208
+ includeNone=False)
2209
+
2202
2210
  if not isinstance(value, list):
2203
2211
  value = [value]
2204
2212
 
@@ -2214,8 +2222,7 @@ def set_session_param(name, value):
2214
2222
  _InternalBuffer.add(function_name=value[0] if name.upper() == 'DEBUG_FUNCTION' else '')
2215
2223
 
2216
2224
  # Set the session parameter.
2217
- execute_sql(getattr(SessionParamsSQL, name.upper()).format(*value))
2218
-
2225
+ execute_sql(getattr(SessionParamsSQL, name.upper()).value.format(*value))
2219
2226
  return True
2220
2227
 
2221
2228
 
@@ -2241,7 +2248,7 @@ def unset_session_param(name):
2241
2248
  ValueError, teradatasql.OperationalError
2242
2249
 
2243
2250
  EXAMPLES:
2244
- # Example 1: unset session to previous time zone.
2251
+ # Example 1: Unset session's time zone to previous time zone.
2245
2252
  >>> set_session_param('timezone', "'GMT+1'")
2246
2253
  True
2247
2254
  >>> unset_session_param("timezone")
@@ -2253,6 +2260,17 @@ def unset_session_param(name):
2253
2260
  function_args.append(["name", name, True, str, True])
2254
2261
  _Validators._validate_function_arguments(function_args)
2255
2262
 
2263
+ # Validate Permitted values for session parameter name which can be unset.
2264
+ permitted_session_parameters = [key.name for key in SessionParamsPythonNames] +\
2265
+ ["character_set_unicode", "debug_function",
2266
+ "isolated_loading", "function_trace",
2267
+ "json_ignore_errors", "query_band"]
2268
+ _Validators._validate_permitted_values(arg=name,
2269
+ permitted_values=permitted_session_parameters,
2270
+ arg_name='name',
2271
+ case_insensitive=True,
2272
+ includeNone=False)
2273
+
2256
2274
  # Check whether session param is set or not first.
2257
2275
  session_params = _InternalBuffer.get('session_params')
2258
2276
  if session_params is None:
@@ -2261,6 +2279,8 @@ def unset_session_param(name):
2261
2279
  raise TeradataMlException(error_msg, msg_code)
2262
2280
  # unset_values stores params which are not available in _InternalBuffer, to unset create a dictionary
2263
2281
  # with param as key and unset param as value
2282
+ # TODO: Unset for ISOLATED_LOADING should revert to previous behaviour, but we are setting it to NO.
2283
+ # This is not correct if ISOLATED_LOADING was CONCURRENT before setting it to NO.
2264
2284
  unset_values = {"CHARACTER_SET_UNICODE": "OFF", "DEBUG_FUNCTION": [_InternalBuffer.get('function_name'), "OFF"],
2265
2285
  "ISOLATED_LOADING": "NO", "FUNCTION_TRACE": "SET SESSION FUNCTION TRACE OFF",
2266
2286
  "JSON_IGNORE_ERRORS": "OFF", "QUERY_BAND": ["", "SESSION"]}
@@ -2275,9 +2295,9 @@ def unset_session_param(name):
2275
2295
  set_session_param(name, unset_values[name.upper()])
2276
2296
  return True
2277
2297
 
2278
- previous_value = "{}".format(session_params[getattr(SessionParamsPythonNames, name.upper())]) \
2298
+ previous_value = "{}".format(session_params[getattr(SessionParamsPythonNames, name.upper()).value]) \
2279
2299
  if name.upper() != 'TIMEZONE' else "'{}'".format(
2280
- session_params[getattr(SessionParamsPythonNames, name.upper())])
2300
+ session_params[getattr(SessionParamsPythonNames, name.upper()).value])
2281
2301
 
2282
2302
  if name.upper() == "ACCOUNT":
2283
2303
  previous_value = [previous_value, 'SESSION']
@@ -13,18 +13,22 @@ install_file, remove_file, replace_file.
13
13
 
14
14
  import os
15
15
  from pathlib import Path
16
+
16
17
  from sqlalchemy import func
17
- from sqlalchemy.sql.expression import text
18
- import teradataml.dataframe as tdmldf
19
- from teradataml.utils.internal_buffer import _InternalBuffer
20
18
  from teradatasql import OperationalError as SqlOperationalError
19
+
20
+ import teradataml.dataframe as tdmldf
21
+ from teradataml.common.constants import TableOperatorConstants
21
22
  from teradataml.common.exceptions import TeradataMlException
22
- from teradataml.common.messages import Messages
23
23
  from teradataml.common.messagecodes import MessageCodes
24
- from teradataml.dbutils.dbutils import _execute_stored_procedure, set_session_param
25
- from teradataml.utils.validators import _Validators
24
+ from teradataml.common.messages import Messages
25
+ from teradataml.common.utils import UtilFuncs
26
+ from teradataml.dbutils.dbutils import (_execute_stored_procedure,
27
+ set_session_param)
26
28
  from teradataml.options.configure import configure
27
- from teradataml.common.constants import TableOperatorConstants
29
+ from teradataml.utils.internal_buffer import _InternalBuffer
30
+ from teradataml.utils.validators import _Validators
31
+
28
32
 
29
33
  def install_file(file_identifier, file_path = None, file_on_client = True, is_binary = False,
30
34
  replace = False, force_replace = False, suppress_output = False):
@@ -300,15 +304,15 @@ def list_files():
300
304
  database = context._get_current_databasename()
301
305
 
302
306
  # set_session_param maintains a buffer of session parameters.
303
- # If the session parameter is not set or if setted SearchUIFDBPath is different
307
+ # If the session parameter is not set or if already set SearchUIFDBPath is different
304
308
  # from the current database, then we will set the SEARCHUIFDBPATH to the current
305
309
  # database. This will avoid setting the SEARCHUIFDBPATH multiple times.
306
310
  session_params = _InternalBuffer.get('session_params')
307
311
  if session_params is None or session_params["SearchUIFDBPath"] != database:
308
- set_session_param("SEARCHUIFDBPATH", database)
312
+ set_session_param("SEARCHUIFDBPATH", UtilFuncs._get_dialect_quoted_name(database))
309
313
 
310
314
  # Get the query to list files installed in Vantage.
311
315
  list_files_query = TableOperatorConstants.SCRIPT_LIST_FILES_QUERY.value \
312
- .format(database, configure.default_varchar_size)
316
+ .format(UtilFuncs._get_dialect_quoted_name(database), configure.default_varchar_size)
313
317
 
314
318
  return tdmldf.dataframe.DataFrame.from_query(list_files_query)
Binary file