teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (131) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +182 -13
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +8 -13
  6. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  7. teradataml/analytics/sqle/__init__.py +16 -1
  8. teradataml/analytics/utils.py +60 -1
  9. teradataml/automl/__init__.py +290 -106
  10. teradataml/automl/autodataprep/__init__.py +471 -0
  11. teradataml/automl/data_preparation.py +29 -10
  12. teradataml/automl/data_transformation.py +11 -0
  13. teradataml/automl/feature_engineering.py +64 -4
  14. teradataml/automl/feature_exploration.py +639 -25
  15. teradataml/automl/model_training.py +1 -1
  16. teradataml/clients/auth_client.py +12 -8
  17. teradataml/clients/keycloak_client.py +165 -0
  18. teradataml/common/constants.py +71 -26
  19. teradataml/common/exceptions.py +32 -0
  20. teradataml/common/messagecodes.py +28 -0
  21. teradataml/common/messages.py +13 -4
  22. teradataml/common/sqlbundle.py +3 -2
  23. teradataml/common/utils.py +345 -45
  24. teradataml/context/context.py +259 -93
  25. teradataml/data/apriori_example.json +22 -0
  26. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  27. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  28. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  29. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  30. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  31. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  32. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  33. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  34. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  35. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  36. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  37. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  38. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  39. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  40. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  41. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  42. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  43. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  45. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  46. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  47. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  48. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  49. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  50. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  51. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  52. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  53. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  54. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  55. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  56. teradataml/data/jsons/byom/onnxembeddings.json +1 -0
  57. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  58. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  59. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  60. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  61. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  62. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  63. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  64. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  65. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  66. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  67. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  68. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  69. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  70. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  71. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  72. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  73. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  74. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
  75. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
  76. teradataml/data/ner_dict.csv +8 -0
  77. teradataml/data/ner_input_eng.csv +7 -0
  78. teradataml/data/ner_rule.csv +5 -0
  79. teradataml/data/pattern_matching_data.csv +11 -0
  80. teradataml/data/pos_input.csv +40 -0
  81. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  82. teradataml/data/tdnerextractor_example.json +14 -0
  83. teradataml/data/teradataml_example.json +21 -1
  84. teradataml/data/textmorph_example.json +5 -0
  85. teradataml/data/to_num_data.csv +4 -0
  86. teradataml/data/tochar_data.csv +5 -0
  87. teradataml/data/trans_dense.csv +16 -0
  88. teradataml/data/trans_sparse.csv +55 -0
  89. teradataml/data/url_data.csv +10 -9
  90. teradataml/dataframe/copy_to.py +38 -27
  91. teradataml/dataframe/data_transfer.py +61 -45
  92. teradataml/dataframe/dataframe.py +1110 -132
  93. teradataml/dataframe/dataframe_utils.py +73 -27
  94. teradataml/dataframe/functions.py +1070 -9
  95. teradataml/dataframe/sql.py +750 -959
  96. teradataml/dbutils/dbutils.py +33 -13
  97. teradataml/dbutils/filemgr.py +14 -10
  98. teradataml/hyperparameter_tuner/utils.py +4 -2
  99. teradataml/lib/aed_0_1.dll +0 -0
  100. teradataml/opensource/_base.py +12 -157
  101. teradataml/options/configure.py +24 -9
  102. teradataml/scriptmgmt/UserEnv.py +317 -39
  103. teradataml/scriptmgmt/lls_utils.py +456 -135
  104. teradataml/sdk/README.md +79 -0
  105. teradataml/sdk/__init__.py +4 -0
  106. teradataml/sdk/_auth_modes.py +422 -0
  107. teradataml/sdk/_func_params.py +487 -0
  108. teradataml/sdk/_json_parser.py +453 -0
  109. teradataml/sdk/_openapi_spec_constants.py +249 -0
  110. teradataml/sdk/_utils.py +236 -0
  111. teradataml/sdk/api_client.py +897 -0
  112. teradataml/sdk/constants.py +62 -0
  113. teradataml/sdk/modelops/__init__.py +98 -0
  114. teradataml/sdk/modelops/_client.py +406 -0
  115. teradataml/sdk/modelops/_constants.py +304 -0
  116. teradataml/sdk/modelops/models.py +2308 -0
  117. teradataml/sdk/spinner.py +107 -0
  118. teradataml/store/__init__.py +1 -1
  119. teradataml/table_operators/Apply.py +16 -1
  120. teradataml/table_operators/Script.py +20 -1
  121. teradataml/table_operators/query_generator.py +4 -21
  122. teradataml/table_operators/table_operator_util.py +58 -9
  123. teradataml/utils/dtypes.py +4 -2
  124. teradataml/utils/internal_buffer.py +22 -2
  125. teradataml/utils/utils.py +0 -1
  126. teradataml/utils/validators.py +318 -58
  127. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/METADATA +188 -14
  128. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/RECORD +131 -84
  129. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/WHEEL +0 -0
  130. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/top_level.txt +0 -0
  131. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/zip-safe +0 -0
@@ -5480,6 +5480,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
5480
5480
  self._quotechar = kw.get("quotechar", None)
5481
5481
  self._udf_script = kw.get("udf_script", None)
5482
5482
  self.alias_name = self.compile() if (self._udf or self._udf_script) is None else None
5483
+ self._debug = kw.get("debug", False)
5483
5484
 
5484
5485
  @property
5485
5486
  def expression(self):
@@ -6737,8 +6738,42 @@ class _SQLColumnExpression(_LogicalColumnExpression,
6737
6738
  """
6738
6739
  return _SQLColumnExpression(self.expression.distinct().label(self.name))
6739
6740
 
6741
+ def _format_ilike_like_args(self, other, escape_char=None):
6742
+ """
6743
+ DESCRIPTION:
6744
+ Internal function to validate and format the arguments passed to
6745
+ 'ilike' and 'like' functions.
6746
+
6747
+ PARAMETERS:
6748
+ other:
6749
+ Required Argument.
6750
+ Specifies a string to match.
6751
+ Types: str OR ColumnExpression
6752
+
6753
+ escape_char:
6754
+ Optional Argument.
6755
+ Specifies the escape character to be used in the pattern.
6756
+ Types: str with one character
6757
+
6758
+ RETURNS:
6759
+ tuple
6760
+
6761
+ EXAMPLES:
6762
+ self._format_ilike_like_args(other='A!%', escape_char='!')
6763
+ """
6764
+ # Validate the arguments.
6765
+ arg_validate = []
6766
+ arg_validate.append(["other", other, False, (str, ColumnExpression), True])
6767
+ arg_validate.append(["escape_char", escape_char, True, (str), True])
6768
+ _Validators._validate_function_arguments(arg_validate)
6769
+
6770
+ # Format the arguments for ilike/like function.
6771
+ other = "{}".format(other.compile()) if isinstance(other, ColumnExpression) else "'{}'".format(other)
6772
+ escape = " ESCAPE '{}'".format(escape_char) if escape_char is not None else ""
6773
+ return other, escape
6774
+
6740
6775
  @collect_queryband(queryband="DFC_ilike")
6741
- def ilike(self, other):
6776
+ def ilike(self, other, escape_char=None):
6742
6777
  """
6743
6778
  DESCRIPTION:
6744
6779
  Function which is used to match the pattern.
@@ -6747,67 +6782,95 @@ class _SQLColumnExpression(_LogicalColumnExpression,
6747
6782
  other:
6748
6783
  Required Argument.
6749
6784
  Specifies a string to match. String match is case insensitive.
6750
- Types: str
6785
+ Types: str OR ColumnExpression
6786
+
6787
+ escape_char:
6788
+ Optional Argument.
6789
+ Specifies the escape character to be used in the pattern.
6790
+ Types: str with one character
6751
6791
 
6752
6792
  RETURNS:
6753
6793
  ColumnExpression.
6754
6794
 
6755
6795
  EXAMPLES:
6756
- >>> load_example_data("dataframe","admissions_train")
6757
- >>> df = DataFrame.from_table('admissions_train')
6758
- masters gpa stats programming admitted
6759
- id
6760
- 13 no 4.00 Advanced Novice 1
6761
- 26 yes 3.57 Advanced Advanced 1
6762
- 5 no 3.44 Novice Novice 0
6763
- 19 yes 1.98 Advanced Advanced 0
6764
- 15 yes 4.00 Advanced Advanced 1
6765
- 40 yes 3.95 Novice Beginner 0
6766
- 7 yes 2.33 Novice Novice 1
6767
- 22 yes 3.46 Novice Beginner 0
6768
- 36 no 3.00 Advanced Novice 0
6769
- 38 yes 2.65 Advanced Beginner 1
6770
-
6771
- # Example 1: Find out the records whose stats starts with 'A'.
6772
- >>> df = df[df.stats.ilike('a%')]
6796
+ # Load example data.
6797
+ >>> load_example_data("teradataml", "pattern_matching_data")
6798
+ >>> df = DataFrame('pattern_matching_data')
6799
+ data pattern level
6800
+ id
6801
+ 5 prod_01 prod_01% Beginner
6802
+ 8 log%2024 l_g% Beginner
6803
+ 2 user%2025 user!%% Beginner
6804
+ 6 prod%v2 prod!_% Novice
6805
+ 4 data%backup data@%% Advanced
6806
+ 10 backup_9 restore!_9 Beginner
6807
+ 7 log_file log^_file Advanced
6808
+ 1 user_Alpha user!_% Advanced
6809
+ 3 data_2024 d% Novice
6810
+ 9 temp_file temp!__% Novice
6811
+
6812
+ # Example 1: Find out the records which starts with 'A' in the column 'level'.
6813
+ >>> df = df[df.level.ilike('A%')]
6773
6814
  >>> df
6774
- masters gpa stats programming admitted
6775
- id
6776
- 19 yes 1.98 Advanced Advanced 0
6777
- 15 yes 4.00 Advanced Advanced 1
6778
- 38 yes 2.65 Advanced Beginner 1
6779
- 26 yes 3.57 Advanced Advanced 1
6780
- 17 no 3.83 Advanced Advanced 1
6781
- 34 yes 3.85 Advanced Beginner 0
6782
- 13 no 4.00 Advanced Novice 1
6783
- 24 no 1.87 Advanced Novice 1
6784
- 36 no 3.00 Advanced Novice 0
6785
- 27 yes 3.96 Advanced Advanced 0
6815
+ data pattern level
6816
+ id
6817
+ 4 data%backup data@%% Advanced
6818
+ 7 log_file log^_file Advanced
6819
+ 1 user_Alpha user!_% Advanced
6786
6820
  >>>
6787
6821
 
6788
6822
  # Example 2: Create a new Column with values as -
6789
- # 1 if value of column 'stats' starts with 'a' and third letter is 'v',
6823
+ # 1 if value of column 'level' starts with 'n' and third letter is 'v',
6790
6824
  # 0 otherwise. Ignore case.
6791
- >>> df.assign(new_col = case_when((df.stats.ilike('a_v%').expression, 1), else_=0))
6792
- masters gpa stats programming admitted n
6793
- id
6794
- 13 no 4.00 Advanced Novice 1 1
6795
- 26 yes 3.57 Advanced Advanced 1 1
6796
- 5 no 3.44 Novice Novice 0 0
6797
- 19 yes 1.98 Advanced Advanced 0 1
6798
- 15 yes 4.00 Advanced Advanced 1 1
6799
- 40 yes 3.95 Novice Beginner 0 0
6800
- 7 yes 2.33 Novice Novice 1 0
6801
- 22 yes 3.46 Novice Beginner 0 0
6802
- 36 no 3.00 Advanced Novice 0 1
6803
- 38 yes 2.65 Advanced Beginner 1 1
6825
+ >>> from sqlalchemy.sql.expression import case as case_when
6826
+ >>> df.assign(new_col = case_when((df.level.ilike('n_v%').expression, 1), else_=0))
6827
+ data pattern level new_col
6828
+ id
6829
+ 3 data_2024 d% Novice 1
6830
+ 1 user_Alpha user!_% Advanced 0
6831
+ 8 log%2024 l_g% Beginner 0
6832
+ 2 user%2025 user!%% Beginner 0
6833
+ 10 backup_9 restore!_9 Beginner 0
6834
+ 9 temp_file temp!__% Novice 1
6835
+ 6 prod%v2 prod!_% Novice 1
6836
+ 5 prod_01 prod_01% Beginner 0
6837
+ 4 data%backup data@%% Advanced 0
6838
+ 7 log_file log^_file Advanced 0
6839
+ >>>
6840
+
6841
+ # Example 3: Find out the records where the value in the 'data' column
6842
+ # matches the pattern specified in the 'pattern' column.
6843
+ >>> df = df[df.data.ilike(df.pattern)]
6844
+ >>> df
6845
+ data pattern level
6846
+ id
6847
+ 3 data_2024 d% Novice
6848
+ 8 log%2024 l_g% Beginner
6849
+ 5 prod_01 prod_01% Beginner
6850
+ >>>
6851
+
6852
+ # Example 4: Find out the records where the value in the 'data' column
6853
+ # matches the pattern specified in the 'pattern' column considering the
6854
+ # escape character as '!'.
6855
+ >>> df = df[df.data.ilike(df.pattern, escape_char='!')]
6856
+ >>> df
6857
+ data pattern level
6858
+ id
6859
+ 8 log%2024 l_g% Beginner
6860
+ 9 temp_file temp!__% Novice
6861
+ 3 data_2024 d% Novice
6862
+ 2 user%2025 user!%% Beginner
6863
+ 1 user_Alpha user!_% Advanced
6864
+ 5 prod_01 prod_01% Beginner
6804
6865
  >>>
6805
6866
  """
6867
+ # Validate and format arguments
6868
+ other, escape = self._format_ilike_like_args(other, escape_char)
6806
6869
  return _SQLColumnExpression(
6807
- literal_column("{} (NOT CASESPECIFIC) LIKE '{}'".format(self.compile(), other)))
6870
+ literal_column("{} (NOT CASESPECIFIC) LIKE {}{}".format(self.compile(), other, escape)))
6808
6871
 
6809
6872
  @collect_queryband(queryband="DFC_like")
6810
- def like(self, other):
6873
+ def like(self, other, escape_char=None):
6811
6874
  """
6812
6875
  DESCRIPTION:
6813
6876
  Function which is used to match the pattern.
@@ -6815,66 +6878,190 @@ class _SQLColumnExpression(_LogicalColumnExpression,
6815
6878
  PARAMETERS:
6816
6879
  other:
6817
6880
  Required Argument.
6818
- Specifies a string to match. String match is case insensitive.
6819
- Types: str
6881
+ Specifies a string to match. String match is case sensitive.
6882
+ Types: str OR ColumnExpression
6883
+
6884
+ escape_char:
6885
+ Optional Argument.
6886
+ Specifies the escape character to be used in the pattern.
6887
+ Types: str with one character
6820
6888
 
6821
6889
  RETURNS:
6822
6890
  ColumnExpression.
6823
6891
 
6824
6892
  EXAMPLES:
6825
- >>> load_example_data("dataframe","admissions_train")
6826
- >>> df = DataFrame.from_table('admissions_train')
6827
- masters gpa stats programming admitted
6828
- id
6829
- 13 no 4.00 Advanced Novice 1
6830
- 26 yes 3.57 Advanced Advanced 1
6831
- 5 no 3.44 Novice Novice 0
6832
- 19 yes 1.98 Advanced Advanced 0
6833
- 15 yes 4.00 Advanced Advanced 1
6834
- 40 yes 3.95 Novice Beginner 0
6835
- 7 yes 2.33 Novice Novice 1
6836
- 22 yes 3.46 Novice Beginner 0
6837
- 36 no 3.00 Advanced Novice 0
6838
- 38 yes 2.65 Advanced Beginner 1
6839
-
6840
- # Example 1: Find out the records whose stats starts with 'A'.
6841
- >>> df = df[df.stats.like('A%')]
6893
+ # Load example data.
6894
+ >>> load_example_data("teradataml", "pattern_matching_data")
6895
+ >>> df = DataFrame('pattern_matching_data')
6896
+ data pattern level
6897
+ id
6898
+ 5 prod_01 prod_01% Beginner
6899
+ 8 log%2024 l_g% Beginner
6900
+ 2 user%2025 user!%% Beginner
6901
+ 6 prod%v2 prod!_% Novice
6902
+ 4 data%backup data@%% Advanced
6903
+ 10 backup_9 restore!_9 Beginner
6904
+ 7 log_file log^_file Advanced
6905
+ 1 user_Alpha user!_% Advanced
6906
+ 3 data_2024 d% Novice
6907
+ 9 temp_file temp!__% Novice
6908
+
6909
+ # Example 1: Find out the records which starts with 'A' in the column 'level'.
6910
+ >>> df = df[df.level.like('A%')]
6842
6911
  >>> df
6843
- masters gpa stats programming admitted
6844
- id
6845
- 19 yes 1.98 Advanced Advanced 0
6846
- 15 yes 4.00 Advanced Advanced 1
6847
- 38 yes 2.65 Advanced Beginner 1
6848
- 26 yes 3.57 Advanced Advanced 1
6849
- 17 no 3.83 Advanced Advanced 1
6850
- 34 yes 3.85 Advanced Beginner 0
6851
- 13 no 4.00 Advanced Novice 1
6852
- 24 no 1.87 Advanced Novice 1
6853
- 36 no 3.00 Advanced Novice 0
6854
- 27 yes 3.96 Advanced Advanced 0
6912
+ data pattern level
6913
+ id
6914
+ 4 data%backup data@%% Advanced
6915
+ 7 log_file log^_file Advanced
6916
+ 1 user_Alpha user!_% Advanced
6855
6917
  >>>
6856
6918
 
6857
6919
  # Example 2: Create a new Column with values as -
6858
- # 1 if value of column 'stats' starts with 'A' and third letter is 'v',
6920
+ # 1 if value of column 'stats' starts with 'N' and third letter is 'v',
6859
6921
  # 0 otherwise. Do not ignore case.
6860
- >>> df.assign(new_col = case_when((df.stats.like('A_v%').expression, 1), else_=0))
6922
+ >>> from sqlalchemy.sql.expression import case as case_when
6923
+ >>> df.assign(new_col = case_when((df.level.like('N_v%').expression, 1), else_=0))
6924
+ data pattern level new_col
6925
+ id
6926
+ 3 data_2024 d% Novice 1
6927
+ 1 user_Alpha user!_% Advanced 0
6928
+ 8 log%2024 l_g% Beginner 0
6929
+ 2 user%2025 user!%% Beginner 0
6930
+ 10 backup_9 restore!_9 Beginner 0
6931
+ 9 temp_file temp!__% Novice 1
6932
+ 6 prod%v2 prod!_% Novice 1
6933
+ 5 prod_01 prod_01% Beginner 0
6934
+ 4 data%backup data@%% Advanced 0
6935
+ 7 log_file log^_file Advanced 0
6936
+ >>>
6937
+
6938
+ # Example 3: Find out the records where the value in the 'data' column
6939
+ # matches the pattern specified in the 'pattern' column.
6940
+ >>> df = df[df.data.like(df.pattern)]
6941
+ >>> df
6942
+ data pattern level
6943
+ id
6944
+ 3 data_2024 d% Novice
6945
+ 8 log%2024 l_g% Beginner
6946
+ 5 prod_01 prod_01% Beginner
6947
+ >>>
6948
+
6949
+ # Example 4: Find out the records where the value in the 'data' column
6950
+ # matches the pattern specified in the 'pattern' column considering the
6951
+ # escape character as '!'.
6952
+ >>> df = df[df.data.like(df.pattern, escape_char='!')]
6861
6953
  >>> df
6862
- masters gpa stats programming admitted n
6863
- id
6864
- 13 no 4.00 Advanced Novice 1 1
6865
- 26 yes 3.57 Advanced Advanced 1 1
6866
- 5 no 3.44 Novice Novice 0 0
6867
- 19 yes 1.98 Advanced Advanced 0 1
6868
- 15 yes 4.00 Advanced Advanced 1 1
6869
- 40 yes 3.95 Novice Beginner 0 0
6870
- 7 yes 2.33 Novice Novice 1 0
6871
- 22 yes 3.46 Novice Beginner 0 0
6872
- 36 no 3.00 Advanced Novice 0 1
6873
- 38 yes 2.65 Advanced Beginner 1 1
6954
+ data pattern level
6955
+ id
6956
+ 8 log%2024 l_g% Beginner
6957
+ 9 temp_file temp!__% Novice
6958
+ 3 data_2024 d% Novice
6959
+ 2 user%2025 user!%% Beginner
6960
+ 1 user_Alpha user!_% Advanced
6961
+ 5 prod_01 prod_01% Beginner
6874
6962
  >>>
6875
6963
  """
6964
+ # Validate and format arguments
6965
+ other, escape = self._format_ilike_like_args(other, escape_char)
6876
6966
  return _SQLColumnExpression(
6877
- literal_column("{} (CASESPECIFIC) LIKE '{}'".format(self.compile(), other)))
6967
+ literal_column("{} (CASESPECIFIC) LIKE {}{}".format(self.compile(), other, escape)))
6968
+
6969
+ def rlike(self, pattern, case_sensitive=True):
6970
+ """
6971
+ DESCRIPTION:
6972
+ Function to match a string against a regular expression pattern.
6973
+
6974
+ PARAMETERS:
6975
+ pattern:
6976
+ Required Argument.
6977
+ Specifies a regular expression pattern to match against the column values.
6978
+ Note:
6979
+ The pattern follows POSIX regular expression syntax.
6980
+ Type: str OR ColumnExpression
6981
+
6982
+ case_sensitive:
6983
+ Optional Argument.
6984
+ Specifies whether the pattern matching is case-sensitive.
6985
+ When set to False, the function ignores case sensitivity and matches
6986
+ the regex. Otherwise, function considers case sensitivity while matching regex.
6987
+ Default: True
6988
+ Type: bool
6989
+
6990
+ RAISES:
6991
+ TeradataMlException
6992
+
6993
+ RETURNS:
6994
+ ColumnExpression
6995
+
6996
+ EXAMPLES:
6997
+ >>> load_example_data("dataframe","admissions_train")
6998
+ >>> df = DataFrame("admissions_train")
6999
+ >>> df
7000
+ masters gpa stats programming admitted
7001
+ id
7002
+ 13 no 4.00 Advanced Novice 1
7003
+ 26 yes 3.57 Advanced Advanced 1
7004
+ 5 no 3.44 Novice Novice 0
7005
+ 19 yes 1.98 Advanced Advanced 0
7006
+ 15 yes 4.00 Advanced Advanced 1
7007
+ 40 yes 3.95 Novice Beginner 0
7008
+ 7 yes 2.33 Novice Novice 1
7009
+ 22 yes 3.46 Novice Beginner 0
7010
+ 36 no 3.00 Advanced Novice 0
7011
+ 38 yes 2.65 Advanced Beginner 1
7012
+
7013
+ # Example 1: Find records whose 'stats' column contains 'van'.
7014
+ >>> result = df[df.stats.rlike('.*van.*')]
7015
+ >>> result
7016
+ masters gpa stats programming admitted
7017
+ id
7018
+ 13 no 4.00 Advanced Novice 1
7019
+ 26 yes 3.57 Advanced Advanced 1
7020
+ 34 yes 3.85 Advanced Beginner 0
7021
+ 19 yes 1.98 Advanced Advanced 0
7022
+ 15 yes 4.00 Advanced Advanced 1
7023
+ 36 no 3.00 Advanced Novice 0
7024
+ 38 yes 2.65 Advanced Beginner 1
7025
+
7026
+ # Example 2: Find records whose 'stats' column ends with 'ced'.
7027
+ >>> result = df[df.stats.rlike('.*ced$')]
7028
+ >>> result
7029
+ masters gpa stats programming admitted
7030
+ id
7031
+ 34 yes 3.85 Advanced Beginner 0
7032
+ 32 yes 3.46 Advanced Beginner 0
7033
+ 11 no 3.13 Advanced Advanced 1
7034
+ 30 yes 3.79 Advanced Novice 0
7035
+ 28 no 3.93 Advanced Advanced 1
7036
+ 16 no 3.70 Advanced Advanced 1
7037
+ 14 yes 3.45 Advanced Advanced 0
7038
+
7039
+ # Example 3: Case-insensitive search for records containing 'NOVICE'.
7040
+ >>> result = df[df.stats.rlike('NOVICE', case_sensitive=False)]
7041
+ >>> result
7042
+ masters gpa stats programming admitted
7043
+ id
7044
+ 12 no 3.65 Novice Novice 1
7045
+ 40 yes 3.95 Novice Beginner 0
7046
+ 7 yes 2.33 Novice Novice 1
7047
+ 5 no 3.44 Novice Novice 0
7048
+ 22 yes 3.46 Novice Beginner 0
7049
+ 37 no 3.52 Novice Novice 1
7050
+ """
7051
+ # Validate arguments
7052
+ arg_validate = []
7053
+ arg_validate.append(["pattern", pattern, False, (str, ColumnExpression), True])
7054
+ arg_validate.append(["case_sensitive", case_sensitive, True, (bool), True])
7055
+ _Validators._validate_function_arguments(arg_validate)
7056
+
7057
+ if isinstance(pattern, ColumnExpression):
7058
+ pattern = pattern.expression
7059
+
7060
+ # Set the case sensitivity modifier based on the parameter.
7061
+ case_modifier = 'c' if case_sensitive else 'i'
7062
+ return _SQLColumnExpression(
7063
+ func.regexp_similar(self.expression, pattern, case_modifier) == 1,
7064
+ type=INTEGER())
6878
7065
 
6879
7066
  @collect_queryband(queryband="DFC_startswith")
6880
7067
  def startswith(self, other):
@@ -7002,12 +7189,12 @@ class _SQLColumnExpression(_LogicalColumnExpression,
7002
7189
  Specifies starting position to extract string from column.
7003
7190
  Note:
7004
7191
  Index position starts with 1 instead of 0.
7005
- Types: int
7192
+ Types: int OR ColumnExpression
7006
7193
 
7007
7194
  length:
7008
7195
  Required Argument.
7009
7196
  Specifies the length of the string to extract from column.
7010
- Types: int
7197
+ Types: int OR ColumnExpression
7011
7198
 
7012
7199
  RETURNS:
7013
7200
  ColumnExpression.
@@ -7040,10 +7227,218 @@ class _SQLColumnExpression(_LogicalColumnExpression,
7040
7227
  emp_name mgr_id mgr_name new_col
7041
7228
  emp_id
7042
7229
  500 Fred 400 Kim on
7230
+
7231
+ # Example 3: Create a new column by passing ColumnExpression as
7232
+ # start_pos and length.
7233
+ >>> df.assign(new_column = df.emp_name.substr(df.emp_id, df.mgr_id))
7234
+ emp_name mgr_id mgr_name new_column
7235
+ emp_id
7236
+ 1 Pat 2 Don Pa
7237
+
7043
7238
  """
7044
- return _SQLColumnExpression(func.substr(self.expression, start_pos, length),
7239
+ # Handle cases where start_pos or length are ColumnExpressions.
7240
+ start_pos_expr = start_pos.expression if isinstance(start_pos, _SQLColumnExpression) else start_pos
7241
+ length_expr = length.expression if isinstance(length, _SQLColumnExpression) else length
7242
+
7243
+ return _SQLColumnExpression(func.substr(self.expression, start_pos_expr, length_expr),
7045
7244
  type=self.type)
7046
7245
 
7246
+ def count_delimiters(self, delimiter):
7247
+ """
7248
+ DESCRIPTION:
7249
+ Function to count the total number of occurrences of a specified delimiter.
7250
+
7251
+ PARAMETERS:
7252
+ delimiter:
7253
+ Required Argument.
7254
+ Specifies the delimiter to count in the column values.
7255
+ Types: str
7256
+
7257
+ RETURNS:
7258
+ ColumnExpression.
7259
+
7260
+ EXAMPLES:
7261
+ # Load sample data
7262
+ >>> load_example_data("dataframe", "admissions_train")
7263
+ >>> df = DataFrame("admissions_train")
7264
+
7265
+ # Create a DataFrame with a column containing delimiters.
7266
+ >>> df1 = df.assign(delim_col = 'ab.c.def.g')
7267
+ >>> df1
7268
+ masters gpa stats programming admitted delim_col
7269
+ id
7270
+ 38 yes 2.65 Advanced Beginner 1 ab.c.def.g
7271
+ 7 yes 2.33 Novice Novice 1 ab.c.def.g
7272
+ 26 yes 3.57 Advanced Advanced 1 ab.c.def.g
7273
+
7274
+ # Example 1: Count the number of periods in column 'delim_col'.
7275
+ >>> res = df1.assign(dot_count = df1.delim_col.count_delimiters('.'))
7276
+ >>> res
7277
+ masters gpa stats programming admitted delim_col dot_count
7278
+ id
7279
+ 38 yes 2.65 Advanced Beginner 1 ab.c.def.g 3
7280
+ 7 yes 2.33 Novice Novice 1 ab.c.def.g 3
7281
+ 26 yes 3.57 Advanced Advanced 1 ab.c.def.g 3
7282
+
7283
+ # Example 2: Count multiple delimiters in a string.
7284
+ >>> df2 = df.assign(delim_col = 'a,b;c;d-e')
7285
+ >>> res = df2.assign(
7286
+ ... comma_count = df2.delim_col.count_delimiters(','),
7287
+ ... semicolon_count = df2.delim_col.count_delimiters(';'),
7288
+ ... colon_count = df2.delim_col.count_delimiters(':'),
7289
+ ... dash_count = df2.delim_col.count_delimiters('-')
7290
+ ... )
7291
+ >>> res
7292
+ masters gpa stats programming admitted delim_col colon_count comma_count dash_count semicolon_count
7293
+ id
7294
+ 38 yes 2.65 Advanced Beginner 1 a,b;c;d-e 0 1 1 2
7295
+ 7 yes 2.33 Novice Novice 1 a,b;c;d-e 0 1 1 2
7296
+ 26 yes 3.57 Advanced Advanced 1 a,b;c;d-e 0 1 1 2
7297
+ 5 no 3.44 Novice Novice 0 a,b;c;d-e 0 1 1 2
7298
+ """
7299
+
7300
+ # Validate arguments
7301
+ arg_validate = []
7302
+ arg_validate.append(["delimiter", delimiter, False, (str), True])
7303
+ _Validators._validate_function_arguments(arg_validate)
7304
+
7305
+ # Calculate the count by comparing the original string length
7306
+ # with the length after removing all delimiters.
7307
+ expression = (func.characters(self.expression) - func.characters(
7308
+ func.oreplace(self.expression, delimiter, '')))// func.characters(delimiter)
7309
+
7310
+ return _SQLColumnExpression(expression, type=INTEGER())
7311
+
7312
+ @collect_queryband(queryband="DFC_substringIndex")
7313
+ def substring_index(self, delimiter, count):
7314
+ """
7315
+ DESCRIPTION:
7316
+ Function to return the substring from a column before a specified
7317
+ delimiter, up to a given occurrence count.
7318
+
7319
+ PARAMETERS:
7320
+ delimiter:
7321
+ Required Argument.
7322
+ Specifies the delimiter string to split the column values.
7323
+ Types: str
7324
+
7325
+ count:
7326
+ Required Argument.
7327
+ Specifies the number of occurrences of the delimiter to consider.
7328
+ If positive, the substring is extracted from the start of the string.
7329
+ If negative, the substring is extracted from the end of the string.
7330
+ If zero, an empty string is returned.
7331
+ Types: int
7332
+
7333
+ RAISES:
7334
+ TeradataMlException
7335
+
7336
+ RETURNS:
7337
+ ColumnExpression.
7338
+
7339
+ EXAMPLES:
7340
+ # Load the data to run the example.
7341
+ >>> load_example_data("dataframe","admissions_train")
7342
+ >>> df = DataFrame('admissions_train')
7343
+
7344
+ # Create a new column 'delim_col' with string.
7345
+ >>> df1 = df.assign(delim_col = 'ab.c.def.g')
7346
+ >>> df1
7347
+ masters gpa stats programming admitted delim_col
7348
+ id
7349
+ 38 yes 2.65 Advanced Beginner 1 ab.c.def.g
7350
+ 7 yes 2.33 Novice Novice 1 ab.c.def.g
7351
+ 26 yes 3.57 Advanced Advanced 1 ab.c.def.g
7352
+ 5 no 3.44 Novice Novice 0 ab.c.def.g
7353
+ 3 no 3.70 Novice Beginner 1 ab.c.def.g
7354
+ 22 yes 3.46 Novice Beginner 0 ab.c.def.g
7355
+ 1 yes 3.95 Beginner Beginner 0 ab.c.def.g
7356
+ 17 no 3.83 Advanced Advanced 1 ab.c.def.g
7357
+ 15 yes 4.00 Advanced Advanced 1 ab.c.def.g
7358
+ 34 yes 3.85 Advanced Beginner 0 ab.c.def.g
7359
+
7360
+ # Example 1: Create a new column 'new_column' by extracting the substring
7361
+ based on positive count.
7362
+ >>> res = df1.assign(new_column = df1.delim_col.substring_index('.', 2))
7363
+ >>> res
7364
+ masters gpa stats programming admitted delim_col new_column
7365
+ id
7366
+ 34 yes 3.85 Advanced Beginner 0 ab.c.def.g ab.c
7367
+ 32 yes 3.46 Advanced Beginner 0 ab.c.def.g ab.c
7368
+ 11 no 3.13 Advanced Advanced 1 ab.c.def.g ab.c
7369
+ 30 yes 3.79 Advanced Novice 0 ab.c.def.g ab.c
7370
+ 28 no 3.93 Advanced Advanced 1 ab.c.def.g ab.c
7371
+ 16 no 3.70 Advanced Advanced 1 ab.c.def.g ab.c
7372
+ 35 no 3.68 Novice Beginner 1 ab.c.def.g ab.c
7373
+ 40 yes 3.95 Novice Beginner 0 ab.c.def.g ab.c
7374
+ 19 yes 1.98 Advanced Advanced 0 ab.c.def.g ab.c
7375
+
7376
+ # Example 2: Create a new column 'new_column' by extracting the substring
7377
+ based on negative count.
7378
+ >>> res = df1.assign(new_column = df1.delim_col.substring_index('.', -3))
7379
+ >>> res
7380
+ masters gpa stats programming admitted delim_col new_column
7381
+ id
7382
+ 34 yes 3.85 Advanced Beginner 0 ab.c.def.g c.def.g
7383
+ 32 yes 3.46 Advanced Beginner 0 ab.c.def.g c.def.g
7384
+ 11 no 3.13 Advanced Advanced 1 ab.c.def.g c.def.g
7385
+ 30 yes 3.79 Advanced Novice 0 ab.c.def.g c.def.g
7386
+ 28 no 3.93 Advanced Advanced 1 ab.c.def.g c.def.g
7387
+ 16 no 3.70 Advanced Advanced 1 ab.c.def.g c.def.g
7388
+ 35 no 3.68 Novice Beginner 1 ab.c.def.g c.def.g
7389
+ 40 yes 3.95 Novice Beginner 0 ab.c.def.g c.def.g
7390
+ 19 yes 1.98 Advanced Advanced 0 ab.c.def.g c.def.g
7391
+
7392
+ # Example 3: Create a new column 'new_column' by extracting the substring
7393
+ with 2-character delimiter based on positive count.
7394
+ >>> res = df1.assign(new_column = df1.delim_col.substring_index('c.d', 1))
7395
+ >>> res
7396
+ masters gpa stats programming admitted delim_col new_column
7397
+ id
7398
+ 34 yes 3.85 Advanced Beginner 0 ab.c.def.g ab.
7399
+ 32 yes 3.46 Advanced Beginner 0 ab.c.def.g ab.
7400
+ 11 no 3.13 Advanced Advanced 1 ab.c.def.g ab.
7401
+ 30 yes 3.79 Advanced Novice 0 ab.c.def.g ab.
7402
+ 28 no 3.93 Advanced Advanced 1 ab.c.def.g ab.
7403
+ 16 no 3.70 Advanced Advanced 1 ab.c.def.g ab.
7404
+ 35 no 3.68 Novice Beginner 1 ab.c.def.g ab.
7405
+ 40 yes 3.95 Novice Beginner 0 ab.c.def.g ab.
7406
+
7407
+ """
7408
+ # Validate arguments
7409
+ arg_validate = []
7410
+ arg_validate.append(["delimiter", delimiter, False, (str), True])
7411
+ arg_validate.append(["count", count, False, (int), True])
7412
+ _Validators._validate_function_arguments(arg_validate)
7413
+
7414
+ # Create the SQL expression for substring_index.
7415
+ if count == 0:
7416
+ return _SQLColumnExpression(literal(""), type=self.type)
7417
+
7418
+ elif count > 0:
7419
+ # For positive count, return substring before the nth occurrence.
7420
+ position = func.instr(self.expression, delimiter, 1, count)
7421
+ # Handle the case where the delimiter is not found.
7422
+ expression = case_when((position == 0, self.expression),
7423
+ else_=func.substring(self.expression, 1, position - 1))
7424
+ else:
7425
+ # For negative count, we need to find substring after the (total - |count|)th delimiter
7426
+ # First, get the total number of delimiters
7427
+ total_delimiters = self.count_delimiters(delimiter).expression
7428
+
7429
+ # Calculate the position to start from (convert negative count to positive position).
7430
+ position = total_delimiters + count + 1
7431
+
7432
+ # Handle the case where the absolute negative count exceeds the total number of delimiters.
7433
+ expression = case_when((position > 0,
7434
+ # Get substring after the nth occurrence from the beginning.
7435
+ func.substring(self.expression,
7436
+ func.instr(self.expression, delimiter, 1, position) + len(delimiter),
7437
+ func.characters(self.expression))),
7438
+ else_=self.expression)
7439
+
7440
+ return _SQLColumnExpression(expression, type=self.type)
7441
+
7047
7442
  @collect_queryband(queryband="DFC_replace")
7048
7443
  def replace(self, to_replace, value=None):
7049
7444
  """
@@ -8954,568 +9349,13 @@ class _SQLColumnExpression(_LogicalColumnExpression,
8954
9349
  PARAMETERS:
8955
9350
  formatter:
8956
9351
  Optional Argument.
8957
- Specifies the string to format the values of a column.
8958
- Type: str
8959
- Note:
9352
+ Specifies the format for formatting the values of the column.
9353
+ Type: str OR ColumnExpression
9354
+ Notes:
8960
9355
  * If 'formatter' is omitted, numeric values is converted to a string exactly
8961
9356
  long enough to hold its significant digits.
9357
+ * Get the supported formatters using `get_formatters("CHAR")` function.
8962
9358
 
8963
- * Formatter for Numeric types:
8964
- +--------------------------------------------------------------------------------------------------+
8965
- | FORMATTER DESCRIPTION |
8966
- +--------------------------------------------------------------------------------------------------+
8967
- | , (comma) A comma in the specified position. |
8968
- | A comma cannot begin a number format. |
8969
- | A comma cannot appear to the right of a decimal |
8970
- | character or period in a number format. |
8971
- | Example: |
8972
- | +-------------------------------------------------+ |
8973
- | | data formatter result | |
8974
- | +-------------------------------------------------+ |
8975
- | | 1234 9,999 1,234 | |
8976
- | +-------------------------------------------------+ |
8977
- +--------------------------------------------------------------------------------------------------+
8978
- | . (period) A decimal point. |
8979
- | User can only specify one period in a number format. |
8980
- | Example: |
8981
- | +-------------------------------------------------+ |
8982
- | | data formatter result | |
8983
- | +-------------------------------------------------+ |
8984
- | | 123.46 9999.9 123.5 | |
8985
- | +-------------------------------------------------+ |
8986
- +--------------------------------------------------------------------------------------------------+
8987
- | $ A value with a leading dollar sign. |
8988
- | Example: |
8989
- | +-------------------------------------------------+ |
8990
- | | data formatter result | |
8991
- | +-------------------------------------------------+ |
8992
- | | 1234 $9999 $1234 | |
8993
- | +-------------------------------------------------+ |
8994
- +--------------------------------------------------------------------------------------------------+
8995
- | 0 Leading zeros. |
8996
- | Trailing zeros. |
8997
- | Example: |
8998
- | +-------------------------------------------------+ |
8999
- | | data formatter result | |
9000
- | +-------------------------------------------------+ |
9001
- | | 1234 09999 01234 | |
9002
- | +-------------------------------------------------+ |
9003
- +--------------------------------------------------------------------------------------------------+
9004
- | 9 A value with the specified number of digits with a |
9005
- | leading space if positive or with a leading minus |
9006
- | if negative. |
9007
- | Example: |
9008
- | +-------------------------------------------------+ |
9009
- | | data formatter result | |
9010
- | +-------------------------------------------------+ |
9011
- | | 1234 9999 1234 | |
9012
- | | 1234 999 #### | |
9013
- | +-------------------------------------------------+ |
9014
- +--------------------------------------------------------------------------------------------------+
9015
- | B Blank space for the integer part of a fixed point number|
9016
- | when the integer part is zero. |
9017
- | Example: |
9018
- | +-------------------------------------------------+ |
9019
- | | data formatter result | |
9020
- | +-------------------------------------------------+ |
9021
- | | 0.1234 B.999 Blank space| |
9022
- | +-------------------------------------------------+ |
9023
- +--------------------------------------------------------------------------------------------------+
9024
- | C The ISO currency symbol as specified in the ISOCurrency |
9025
- | element in the SDF file. |
9026
- | Example: |
9027
- | +-------------------------------------------------+ |
9028
- | | data formatter result | |
9029
- | +-------------------------------------------------+ |
9030
- | | 234 C999 USD234 | |
9031
- | +-------------------------------------------------+ |
9032
- +--------------------------------------------------------------------------------------------------+
9033
- | D The character that separates the integer and fractional |
9034
- | part of non-monetary values. |
9035
- | Example: |
9036
- | +-------------------------------------------------+ |
9037
- | | data formatter result | |
9038
- | +-------------------------------------------------+ |
9039
- | | 234.56 999D9 234.6 | |
9040
- | +-------------------------------------------------+ |
9041
- +--------------------------------------------------------------------------------------------------+
9042
- | EEEE A value in scientific notation. |
9043
- | Example: |
9044
- | +-------------------------------------------------+ |
9045
- | | data formatter result | |
9046
- | +-------------------------------------------------+ |
9047
- | | 234.56 9.9EEEE 2.3E+02 | |
9048
- | +-------------------------------------------------+ |
9049
- +--------------------------------------------------------------------------------------------------+
9050
- | G The character that separates groups of digits in the |
9051
- | integer part of non-monetary values. |
9052
- | +-------------------------------------------------+ |
9053
- | | data formatter result | |
9054
- | +-------------------------------------------------+ |
9055
- | | 123456 9G99G99 1,234,56 | |
9056
- | +-------------------------------------------------+ |
9057
- +--------------------------------------------------------------------------------------------------+
9058
- | L The string representing the local currency as specified |
9059
- | in the Currency element according to system settings. |
9060
- | Example: |
9061
- | +-------------------------------------------------+ |
9062
- | | data formatter result | |
9063
- | +-------------------------------------------------+ |
9064
- | | 234 L999 $234 | |
9065
- | +-------------------------------------------------+ |
9066
- +--------------------------------------------------------------------------------------------------+
9067
- | MI A trailing minus sign if the value is negative. |
9068
- | The MI format element can appear only in the last |
9069
- | position of a number format. |
9070
- | Example: |
9071
- | +-------------------------------------------------+ |
9072
- | | data formatter result | |
9073
- | +-------------------------------------------------+ |
9074
- | | -1234 9999MI 1234- | |
9075
- | +-------------------------------------------------+ |
9076
- +--------------------------------------------------------------------------------------------------+
9077
- | PR A negative value in <angle brackets>, or |
9078
- | a positive value with a leading and trailing blank. |
9079
- | The PR format element can appear only in the last |
9080
- | position of a number format. |
9081
- | Example: |
9082
- | +-------------------------------------------------+ |
9083
- | | data formatter result | |
9084
- | +-------------------------------------------------+ |
9085
- | | -1234 9999PR <1234> | |
9086
- | +-------------------------------------------------+ |
9087
- +--------------------------------------------------------------------------------------------------+
9088
- | S A negative value with a leading or trailing minus sign. |
9089
- | a positive value with a leading or trailing plus sign. |
9090
- | The S format element can appear only in the first or |
9091
- | last position of a number format. |
9092
- | Example: |
9093
- | +-------------------------------------------------+ |
9094
- | | data formatter result | |
9095
- | +-------------------------------------------------+ |
9096
- | | +1234 S9999 +1234 | |
9097
- | +-------------------------------------------------+ |
9098
- +--------------------------------------------------------------------------------------------------+
9099
- | TM (text minimum format) Returns the smallest number of |
9100
- | characters possible. This element is case insensitive. |
9101
- | TM or TM9 return the number in fixed notation unless |
9102
- | the output exceeds 64 characters. If the output exceeds |
9103
- | 64 characters, the number is returned in scientific |
9104
- | notation. |
9105
- | TME returns the number in scientific notation with the |
9106
- | smallest number of characters. |
9107
- | You cannot precede this element with an other element. |
9108
- | You can follow this element only with one 9 or one E |
9109
- | (or e), but not with any combination of these. |
9110
- | Example: |
9111
- | +-------------------------------------------------+ |
9112
- | | data formatter result | |
9113
- | +-------------------------------------------------+ |
9114
- | | 1234 TM 1234 | |
9115
- | +-------------------------------------------------+ |
9116
- +--------------------------------------------------------------------------------------------------+
9117
- | U (dual currency) The string that represents the dual |
9118
- | currency as specified in the DualCurrency element |
9119
- | according to system settings. |
9120
- | Example: |
9121
- | +-------------------------------------------------+ |
9122
- | | data formatter result | |
9123
- | +-------------------------------------------------+ |
9124
- | | 1234 U9999 $1234 | |
9125
- | +-------------------------------------------------+ |
9126
- +--------------------------------------------------------------------------------------------------+
9127
- | V A value multiplied by 10 to the n (and, if necessary, |
9128
- | rounded up), where n is the number of 9's after the V. |
9129
- | Example: |
9130
- | +-------------------------------------------------+ |
9131
- | | data formatter result | |
9132
- | +-------------------------------------------------+ |
9133
- | | 1234 9999V99 123400 | |
9134
- | +-------------------------------------------------+ |
9135
- +--------------------------------------------------------------------------------------------------+
9136
- | X The hexadecimal value of the specified number of digits.|
9137
- | If the specified number is not an integer, the function |
9138
- | will round it to an integer. |
9139
- | This element accepts only positive values or zero. |
9140
- | Negative values return an error. You can precede this |
9141
- | element only with zero (which returns leading zeros) or |
9142
- | FM. Any other elements return an error. If you do not |
9143
- | specify zero or FM, the return always has one leading |
9144
- | blank. |
9145
- | Example: |
9146
- | +-------------------------------------------------+ |
9147
- | | data formatter result | |
9148
- | +-------------------------------------------------+ |
9149
- | | 1234 XXXX 4D2 | |
9150
- | +-------------------------------------------------+ |
9151
- +--------------------------------------------------------------------------------------------------+
9152
-
9153
- * Formatter for Date types:
9154
- +--------------------------------------------------------------------------------------------------+
9155
- | FORMATTER DESCRIPTION |
9156
- +--------------------------------------------------------------------------------------------------+
9157
- | - |
9158
- | / |
9159
- | , Punctuation characters are ignored and text enclosed in |
9160
- | . quotation marks is ignored. |
9161
- | ; |
9162
- | : |
9163
- | "text" |
9164
- | Example: |
9165
- | +-------------------------------------------------+ |
9166
- | | data formatter result | |
9167
- | +-------------------------------------------------+ |
9168
- | | 03/09/17 MM-DD 09-17 | |
9169
- | +-------------------------------------------------+ |
9170
- +--------------------------------------------------------------------------------------------------+
9171
- | AD AD indicator. |
9172
- | A.D. |
9173
- | Example: |
9174
- | +-------------------------------------------------+ |
9175
- | | data formatter result | |
9176
- | +-------------------------------------------------+ |
9177
- | | 03/09/17 CCAD 21AD | |
9178
- | +-------------------------------------------------+ |
9179
- +--------------------------------------------------------------------------------------------------+
9180
- | AM Meridian indicator. |
9181
- | A.M. |
9182
- | Example: |
9183
- | +-------------------------------------------------+ |
9184
- | | data formatter result | |
9185
- | +-------------------------------------------------+ |
9186
- | | 03/09/17 CCAM 21AM | |
9187
- | +-------------------------------------------------+ |
9188
- +--------------------------------------------------------------------------------------------------+
9189
- | BC |
9190
- | B.C. BC indicator. |
9191
- | Example: |
9192
- | +-------------------------------------------------+ |
9193
- | | data formatter result | |
9194
- | +-------------------------------------------------+ |
9195
- | | 03/09/17 CCBC 21BC | |
9196
- | +-------------------------------------------------+ |
9197
- +--------------------------------------------------------------------------------------------------+
9198
- | CC Century. |
9199
- | SCC If the last 2 digits of a 4-digit year are between 01 |
9200
- | and 99 inclusive, the century is 1 greater than the |
9201
- | first 2 digits of that year. |
9202
- | If the last 2 digits of a 4-digit year are 00, the |
9203
- | century is the same as the first 2 digits of that year. |
9204
- | Example: |
9205
- | +-------------------------------------------------+ |
9206
- | | data formatter result | |
9207
- | +-------------------------------------------------+ |
9208
- | | 03/09/17 CCBC 21BC | |
9209
- | +-------------------------------------------------+ |
9210
- +--------------------------------------------------------------------------------------------------+
9211
- | D Day of week (1-7). |
9212
- | Example: |
9213
- | +-------------------------------------------------+ |
9214
- | | data formatter result | |
9215
- | +-------------------------------------------------+ |
9216
- | | 03/09/17 D 4 | |
9217
- | +-------------------------------------------------+ |
9218
- +--------------------------------------------------------------------------------------------------+
9219
- | DAY Name of day. |
9220
- | Example: |
9221
- | +-------------------------------------------------+ |
9222
- | | data formatter result | |
9223
- | +-------------------------------------------------+ |
9224
- | | 03/09/17 DAY WEDNESDAY | |
9225
- | +-------------------------------------------------+ |
9226
- +--------------------------------------------------------------------------------------------------+
9227
- | DD Day of month (1-31). |
9228
- | Example: |
9229
- | +-------------------------------------------------+ |
9230
- | | data formatter result | |
9231
- | +-------------------------------------------------+ |
9232
- | | 03/09/17 DD 17 | |
9233
- | +-------------------------------------------------+ |
9234
- +--------------------------------------------------------------------------------------------------+
9235
- | DDD Day of year (1-366). |
9236
- | Example: |
9237
- | +-------------------------------------------------+ |
9238
- | | data formatter result | |
9239
- | +-------------------------------------------------+ |
9240
- | | 03/09/17 DDD 260 | |
9241
- | +-------------------------------------------------+ |
9242
- +--------------------------------------------------------------------------------------------------+
9243
- | DL Date Long. Equivalent to the format string ‘FMDay, |
9244
- | Month FMDD, YYYY’. |
9245
- | Example: |
9246
- | +-------------------------------------------------+ |
9247
- | | data formatter result | |
9248
- | +-------------------------------------------------+ |
9249
- | | 03/09/17 DL Wednesday, September 17, 2003| |
9250
- | +-------------------------------------------------+ |
9251
- +--------------------------------------------------------------------------------------------------+
9252
- | DS Date Short. Equivalent to the format string |
9253
- | ‘FMMM/DD/YYYYFM’. |
9254
- | Example: |
9255
- | +-------------------------------------------------+ |
9256
- | | data formatter result | |
9257
- | +-------------------------------------------------+ |
9258
- | | 03/09/17 DS 9/17/2003 | |
9259
- | +-------------------------------------------------+ |
9260
- +--------------------------------------------------------------------------------------------------+
9261
- | DY abbreviated name of day. |
9262
- | Example: |
9263
- | +-------------------------------------------------+ |
9264
- | | data formatter result | |
9265
- | +-------------------------------------------------+ |
9266
- | | 03/09/17 DY WED | |
9267
- | +-------------------------------------------------+ |
9268
- +--------------------------------------------------------------------------------------------------+
9269
- | FF [1..9] Fractional seconds. |
9270
- | Use [1..9] to specify the number of fractional digits. |
9271
- | FF without any number following it prints a decimal |
9272
- | followed by digits equal to the number of fractional |
9273
- | seconds in the input data type. If the data type has no |
9274
- | fractional digits, FF prints nothing. |
9275
- | Any fractional digits beyond 6 digits are truncated. |
9276
- | Example: |
9277
- | +-------------------------------------------------+ |
9278
- | | data formatter result | |
9279
- | +-------------------------------------------------+ |
9280
- | | 2016-01-06 09:08:01.000000 FF 000000 | |
9281
- | +-------------------------------------------------+ |
9282
- +--------------------------------------------------------------------------------------------------+
9283
- | HH |
9284
- | HH12 Hour of day (1-12). |
9285
- | Example: |
9286
- | +-------------------------------------------------+ |
9287
- | | data formatter result | |
9288
- | +-------------------------------------------------+ |
9289
- | | 2016-01-06 09:08:01.000000 HH 09 | |
9290
- | +-------------------------------------------------+ |
9291
- +--------------------------------------------------------------------------------------------------+
9292
- | HH24 Hour of the day (0-23). |
9293
- | Example: |
9294
- | +-------------------------------------------------+ |
9295
- | | data formatter result | |
9296
- | +-------------------------------------------------+ |
9297
- | | 2016-01-06 09:08:01.000000 HH24 09 | |
9298
- | +-------------------------------------------------+ |
9299
- +--------------------------------------------------------------------------------------------------+
9300
- | IW Week of year (1-52 or 1-53) based on ISO model. |
9301
- | Example: |
9302
- | +-------------------------------------------------+ |
9303
- | | data formatter result | |
9304
- | +-------------------------------------------------+ |
9305
- | | 2016-01-06 09:08:01.000000 IW 01 | |
9306
- | +-------------------------------------------------+ |
9307
- +--------------------------------------------------------------------------------------------------+
9308
- | IYY |
9309
- | IY Last 3, 2, or 1 digits of ISO year. |
9310
- | I |
9311
- | Example: |
9312
- | +-------------------------------------------------+ |
9313
- | | data formatter result | |
9314
- | +-------------------------------------------------+ |
9315
- | | 2016-01-06 09:08:01.000000 IY 16 | |
9316
- | +-------------------------------------------------+ |
9317
- +--------------------------------------------------------------------------------------------------+
9318
- | IYYY 4-digit year based on the ISO standard. |
9319
- | Example: |
9320
- | +-------------------------------------------------+ |
9321
- | | data formatter result | |
9322
- | +-------------------------------------------------+ |
9323
- | | 2016-01-06 09:08:01.000000 IYYY 2016 | |
9324
- | +-------------------------------------------------+ |
9325
- +--------------------------------------------------------------------------------------------------+
9326
- | J Julian day, the number of days since January 1, 4713 BC. |
9327
- | Number specified with J must be integers. |
9328
- | Teradata uses the Gregorian calendar in calculations to |
9329
- | and from Julian Days. |
9330
- | Example: |
9331
- | +-------------------------------------------------+ |
9332
- | | data formatter result | |
9333
- | +-------------------------------------------------+ |
9334
- | | 2016-01-06 09:08:01.000000 J 2457394 | |
9335
- | +-------------------------------------------------+ |
9336
- +--------------------------------------------------------------------------------------------------+
9337
- | MI Minute (0-59). |
9338
- | Example: |
9339
- | +-------------------------------------------------+ |
9340
- | | data formatter result | |
9341
- | +-------------------------------------------------+ |
9342
- | | 2016-01-06 09:08:01.000000 MI 08 | |
9343
- | +-------------------------------------------------+ |
9344
- +--------------------------------------------------------------------------------------------------+
9345
- | MM Month (01-12). |
9346
- | Example: |
9347
- | +-------------------------------------------------+ |
9348
- | | data formatter result | |
9349
- | +-------------------------------------------------+ |
9350
- | | 2016-01-06 09:08:01.000000 MM 01 | |
9351
- | +-------------------------------------------------+ |
9352
- +--------------------------------------------------------------------------------------------------+
9353
- | MON Abbreviated name of month. |
9354
- | Example: |
9355
- | +-------------------------------------------------+ |
9356
- | | data formatter result | |
9357
- | +-------------------------------------------------+ |
9358
- | | 2016-01-06 09:08:01.000000 MON JAN | |
9359
- | +-------------------------------------------------+ |
9360
- +--------------------------------------------------------------------------------------------------+
9361
- | MONTH Name of month. |
9362
- | Example: |
9363
- | +-------------------------------------------------+ |
9364
- | | data formatter result | |
9365
- | +-------------------------------------------------+ |
9366
- | | 2016-01-06 09:08:01.000000 MONTH JANUARY | |
9367
- | +-------------------------------------------------+ |
9368
- +--------------------------------------------------------------------------------------------------+
9369
- | PM |
9370
- | P.M. Meridian indicator. |
9371
- | Example: |
9372
- | +-------------------------------------------------+ |
9373
- | | data formatter result | |
9374
- | +-------------------------------------------------+ |
9375
- | | 2016-01-06 09:08:01.000000 HHPM 09PM | |
9376
- | +-------------------------------------------------+ |
9377
- +--------------------------------------------------------------------------------------------------+
9378
- | Q Quarter of year (1, 2, 3, 4). |
9379
- | Example: |
9380
- | +-------------------------------------------------+ |
9381
- | | data formatter result | |
9382
- | +-------------------------------------------------+ |
9383
- | | 2016-01-06 09:08:01.000000 Q 1 | |
9384
- | +-------------------------------------------------+ |
9385
- +--------------------------------------------------------------------------------------------------+
9386
- | RM Roman numeral month (I - XII). |
9387
- | Example: |
9388
- | +-------------------------------------------------+ |
9389
- | | data formatter result | |
9390
- | +-------------------------------------------------+ |
9391
- | | 2016-01-06 09:08:01.000000 RM I | |
9392
- | +-------------------------------------------------+ |
9393
- +--------------------------------------------------------------------------------------------------+
9394
- | SP Spelled. Any numeric element followed by SP is spelled in|
9395
- | English words. The words are capitalized according to how|
9396
- | the element is capitalized. |
9397
- | For example: 'DDDSP' specifies all uppercase, 'DddSP' |
9398
- | specifies that the first letter is capitalized, and |
9399
- | 'dddSP' specifies all lowercase. |
9400
- | Example: |
9401
- | +-------------------------------------------------+ |
9402
- | | data formatter result | |
9403
- | +-------------------------------------------------+ |
9404
- | | 2016-01-06 09:08:01.000000 HHSP NINE | |
9405
- | +-------------------------------------------------+ |
9406
- +--------------------------------------------------------------------------------------------------+
9407
- | SS Second (0-59). |
9408
- | Example: |
9409
- | +-------------------------------------------------+ |
9410
- | | data formatter result | |
9411
- | +-------------------------------------------------+ |
9412
- | | 2016-01-06 09:08:01.000000 SS 03 | |
9413
- | +-------------------------------------------------+ |
9414
- +--------------------------------------------------------------------------------------------------+
9415
- | SSSSS Seconds past midnight (0-86399). |
9416
- | Example: |
9417
- | +-------------------------------------------------+ |
9418
- | | data formatter result | |
9419
- | +-------------------------------------------------+ |
9420
- | | 2016-01-06 09:08:01.000000 SSSSS 32883 | |
9421
- | +-------------------------------------------------+ |
9422
- +--------------------------------------------------------------------------------------------------+
9423
- | TS Time Short. Equivalent to the format string |
9424
- | 'HH:MI:SS AM'. |
9425
- | Example: |
9426
- | +-------------------------------------------------+ |
9427
- | | data formatter result | |
9428
- | +-------------------------------------------------+ |
9429
- | | 2016-01-06 09:08:01.000000 TS 09:08:01 AM | |
9430
- | +-------------------------------------------------+ |
9431
- +--------------------------------------------------------------------------------------------------+
9432
- | TZH Time zone hour. |
9433
- | Example: |
9434
- | +-------------------------------------------------+ |
9435
- | | data formatter result | |
9436
- | +-------------------------------------------------+ |
9437
- | | 2016-01-06 09:08:01.000000 TZH +00 | |
9438
- | +-------------------------------------------------+ |
9439
- +--------------------------------------------------------------------------------------------------+
9440
- | TZM Time zone minute. |
9441
- | Example: |
9442
- | +-------------------------------------------------+ |
9443
- | | data formatter result | |
9444
- | +-------------------------------------------------+ |
9445
- | | 2016-01-06 09:08:01.000000 TZM 00 | |
9446
- | +-------------------------------------------------+ |
9447
- +--------------------------------------------------------------------------------------------------+
9448
- | TZR Time zone region. Equivalent to the format string |
9449
- | 'TZH:TZM'. |
9450
- | Example: |
9451
- | +-------------------------------------------------+ |
9452
- | | data formatter result | |
9453
- | +-------------------------------------------------+ |
9454
- | | 2016-01-06 09:08:01.000000 TZR +00:00 | |
9455
- | +-------------------------------------------------+ |
9456
- +--------------------------------------------------------------------------------------------------+
9457
- | WW Week of year (1-53) where week 1 starts on the first day |
9458
- | of the year and continues to the 7th day of the year. |
9459
- | Example: |
9460
- | +-------------------------------------------------+ |
9461
- | | data formatter result | |
9462
- | +-------------------------------------------------+ |
9463
- | | 2016-01-06 09:08:01.000000 WW 01 | |
9464
- | +-------------------------------------------------+ |
9465
- +--------------------------------------------------------------------------------------------------+
9466
- | W Week of month (1-5) where week 1 starts on the first day |
9467
- | of the month and ends on the seventh. |
9468
- | Example: |
9469
- | +-------------------------------------------------+ |
9470
- | | data formatter result | |
9471
- | +-------------------------------------------------+ |
9472
- | | 2016-01-06 09:08:01.000000 W 1 | |
9473
- | +-------------------------------------------------+ |
9474
- +--------------------------------------------------------------------------------------------------+
9475
- | X Local radix character. |
9476
- | Example: |
9477
- | +-------------------------------------------------+ |
9478
- | | data formatter result | |
9479
- | +-------------------------------------------------+ |
9480
- | | 2016-01-06 09:08:01.000000 MMXYY 01.16 | |
9481
- | +-------------------------------------------------+ |
9482
- +--------------------------------------------------------------------------------------------------+
9483
- | Y,YYY Year with comma in this position. |
9484
- | Example: |
9485
- | +-------------------------------------------------+ |
9486
- | | data formatter result | |
9487
- | +-------------------------------------------------+ |
9488
- | | 2016-01-06 09:08:01.000000 Y,YYY 2,016 | |
9489
- | +-------------------------------------------------+ |
9490
- +--------------------------------------------------------------------------------------------------+
9491
- | YEAR Year, spelled out. S prefixes BC dates with a minus sign.|
9492
- | SYEAR |
9493
- | Example: |
9494
- | +-------------------------------------------------+ |
9495
- | | data formatter result | |
9496
- | +-------------------------------------------------+ |
9497
- | | 2016-01-06 09:08:01.000000 YEAR TWENTY SIXTEEN| |
9498
- | +-------------------------------------------------+ |
9499
- +--------------------------------------------------------------------------------------------------+
9500
- | YYYY |
9501
- | SYYYY 4-digit year. S prefixes BC dates with a minus sign. |
9502
- | Example: |
9503
- | +-------------------------------------------------+ |
9504
- | | data formatter result | |
9505
- | +-------------------------------------------------+ |
9506
- | | 2016-01-06 09:08:01.000000 YYYY 2016 | |
9507
- | +-------------------------------------------------+ |
9508
- +--------------------------------------------------------------------------------------------------+
9509
- | YYY Last 3, 2, or 1 digit of year. |
9510
- | YY If the current year and the specified year are both in |
9511
- | Y the range of 0-49, the date is in the current century. |
9512
- | Example: |
9513
- | +-------------------------------------------------+ |
9514
- | | data formatter result | |
9515
- | +-------------------------------------------------+ |
9516
- | | 2016-01-06 09:08:01.000000 YY 16 | |
9517
- | +-------------------------------------------------+ |
9518
- +--------------------------------------------------------------------------------------------------+
9519
9359
 
9520
9360
  RAISES:
9521
9361
  TypeError, ValueError, TeradataMlException
@@ -9525,94 +9365,188 @@ class _SQLColumnExpression(_LogicalColumnExpression,
9525
9365
 
9526
9366
  EXAMPLES:
9527
9367
  # Load the data to run the example.
9528
- >>> load_example_data("uaf", "stock_data")
9368
+ >>> load_example_data("teradataml", "tochar_data")
9529
9369
 
9530
- # Create a DataFrame on 'stock_data' table.
9531
- >>> df = DataFrame("stock_data")
9370
+ # Create a DataFrame on 'tochar_data' table.
9371
+ >>> df = DataFrame("tochar_data")
9532
9372
  >>> df
9533
- seq_no timevalue magnitude
9534
- data_set_id
9535
- 556 3 19/01/16 61.080
9536
- 556 5 19/01/30 63.810
9537
- 556 6 19/02/06 63.354
9538
- 556 7 19/02/13 63.871
9539
- 556 9 19/02/27 61.490
9540
- 556 10 19/03/06 61.524
9541
- 556 8 19/02/20 61.886
9542
- 556 4 19/01/23 63.900
9543
- 556 2 19/01/09 61.617
9544
- 556 1 19/01/02 60.900
9373
+ int_col float_col date_col int_format float_format date_format
9374
+ id
9375
+ 3 1314 123.46 03/09/17 XXXX TM9 DY
9376
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD
9377
+ 2 789 123.46 03/09/17 0999 9999.9 DAY
9378
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD
9379
+
9545
9380
  >>> df.tdtypes
9546
- column type
9547
- data_set_id INTEGER()
9548
- seq_no INTEGER()
9549
- timevalue DATE()
9550
- magnitude FLOAT()
9551
-
9552
- # Example 1: Convert 'seq_no' column to character type.
9553
- >>> res = df.assign(seq_no = df.seq_no.to_char())
9381
+ COLUMN NAME TYPE
9382
+ id INTEGER()
9383
+ int_col INTEGER()
9384
+ float_col FLOAT()
9385
+ date_col DATE()
9386
+ int_format VARCHAR(length=20, charset='LATIN')
9387
+ float_format VARCHAR(length=20, charset='LATIN')
9388
+ date_format VARCHAR(length=20, charset='LATIN')
9389
+
9390
+ # Example 1: Convert 'int_col' column to character type.
9391
+ >>> res = df.assign(int_col = df.int_col.to_char())
9554
9392
  >>> res
9555
- seq_no timevalue magnitude
9556
- data_set_id
9557
- 556 3 19/01/16 61.080
9558
- 556 5 19/01/30 63.810
9559
- 556 6 19/02/06 63.354
9560
- 556 7 19/02/13 63.871
9561
- 556 9 19/02/27 61.490
9562
- 556 10 19/03/06 61.524
9563
- 556 8 19/02/20 61.886
9564
- 556 4 19/01/23 63.900
9565
- 556 2 19/01/09 61.617
9566
- 556 1 19/01/02 60.900
9393
+ int_col float_col date_col int_format float_format date_format
9394
+ id
9395
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD
9396
+ 3 1314 123.46 03/09/17 XXXX TM9 DY
9397
+ 2 789 123.46 03/09/17 0999 9999.9 DAY
9398
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD
9399
+
9567
9400
  >>> res.tdtypes
9568
- columnn type
9569
- data_set_id INTEGER()
9570
- seq_no VARCHAR()
9571
- timevalue DATE()
9572
- magnitude FLOAT()
9573
-
9574
- # Example 2: Convert "magnitude" column to character type in '$99.9' format.
9575
- >>> res = df.assign(char_column = df.magnitude.to_char('$99.9'))
9401
+ COLUMN NAME TYPE
9402
+ id INTEGER()
9403
+ int_col VARCHAR()
9404
+ float_col FLOAT()
9405
+ date_col DATE()
9406
+ int_format VARCHAR(length=20, charset='LATIN')
9407
+ float_format VARCHAR(length=20, charset='LATIN')
9408
+ date_format VARCHAR(length=20, charset='LATIN')
9409
+
9410
+ # Example 2: Convert 'float_col' column to character type in '$999.9' format.
9411
+ >>> res = df.assign(char_col = df.float_col.to_char('$999.9'))
9576
9412
  >>> res
9577
- seq_no timevalue magnitude char_column
9578
- data_set_id
9579
- 556 3 19/01/16 61.080 $61.1
9580
- 556 5 19/01/30 63.810 $63.8
9581
- 556 6 19/02/06 63.354 $63.4
9582
- 556 7 19/02/13 63.871 $63.9
9583
- 556 9 19/02/27 61.490 $61.5
9584
- 556 10 19/03/06 61.524 $61.5
9585
- 556 8 19/02/20 61.886 $61.9
9586
- 556 4 19/01/23 63.900 $63.9
9587
- 556 2 19/01/09 61.617 $61.6
9588
- 556 1 19/01/02 60.900 $60.9
9589
-
9590
- # Example 3: Convert "timevalue" column to character type in 'YYYY-DAY-MONTH' format
9591
- >>> res = df.assign(timevalue = df.timevalue.to_char('YYYY-DAY-MONTH'))
9413
+ int_col float_col date_col int_format float_format date_format char_col
9414
+ id
9415
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD $234.6
9416
+ 3 1314 123.46 03/09/17 XXXX TM9 DY $123.5
9417
+ 2 789 123.46 03/09/17 0999 9999.9 DAY $123.5
9418
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD $234.6
9419
+
9420
+ # Example 3: Convert 'date_col' column to character type in 'YYYY-DAY-MONTH' format
9421
+ >>> res = df.assign(char_col = df.date_col.to_char('YYYY-DAY-MONTH'))
9592
9422
  >>> res
9593
- seq_no timevalue magnitude
9594
- data_set_id
9595
- 556 3 2019-WEDNESDAY-JANUARY 61.080
9596
- 556 5 2019-WEDNESDAY-JANUARY 63.810
9597
- 556 6 2019-WEDNESDAY-FEBRUARY 63.354
9598
- 556 7 2019-WEDNESDAY-FEBRUARY 63.871
9599
- 556 9 2019-WEDNESDAY-FEBRUARY 61.490
9600
- 556 10 2019-WEDNESDAY-MARCH 61.524
9601
- 556 8 2019-WEDNESDAY-FEBRUARY 61.886
9602
- 556 4 2019-WEDNESDAY-JANUARY 63.900
9603
- 556 2 2019-WEDNESDAY-JANUARY 61.617
9604
- 556 1 2019-WEDNESDAY-JANUARY 60.900
9423
+ int_col float_col date_col int_format float_format date_format char_col
9424
+ id
9425
+ 3 1314 123.4600 03/09/17 XXXX TM9 DY 1903-THURSDAY -SEPTEMBER
9426
+ 0 1234 234.5600 03/09/17 9,999 999D9 MM-DD 1903-THURSDAY -SEPTEMBER
9427
+ 2 789 123.4600 03/09/17 0999 9999.9 DAY 1903-THURSDAY -SEPTEMBER
9428
+ 1 456 234.5600 03/09/17 $999 9.9EEEE CCAD 1903-THURSDAY -SEPTEMBER
9429
+
9430
+ # Example 4: Convert 'int_col' column to character type in 'int_format' column format.
9431
+ >>> res = df.assign(char_col = df.int_col.to_char(df.int_format))
9432
+ >>> res
9433
+ int_col float_col date_col int_format float_format date_format char_col
9434
+ id
9435
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD 1,234
9436
+ 3 1314 123.46 03/09/17 XXXX TM9 DY 522
9437
+ 2 789 123.46 03/09/17 0999 9999.9 DAY 0789
9438
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD $456
9439
+
9440
+ # Example 5: Convert 'float_col' column to character type in 'float_format' column format.
9441
+ >>> res = df.assign(char_col = df.float_col.to_char(df.float_format))
9442
+ >>> res
9443
+ int_col float_col date_col int_format float_format date_format char_col
9444
+ id
9445
+ 2 789 123.46 03/09/17 0999 9999.9 DAY 123.5
9446
+ 3 1314 123.46 03/09/17 XXXX TM9 DY 123.46
9447
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD 2.3E+02
9448
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD 234.6
9449
+
9450
+ # Example 4: Convert 'date_col' column to character type in 'date_format' column format.
9451
+ >>> res = df.assign(char_col = df.date_col.to_char(df.date_format))
9452
+ >>> res
9453
+ int_col float_col date_col int_format float_format date_format char_col
9454
+ id
9455
+ 0 1234 234.56 03/09/17 9,999 999D9 MM-DD 09-17
9456
+ 3 1314 123.46 03/09/17 XXXX TM9 DY THU
9457
+ 2 789 123.46 03/09/17 0999 9999.9 DAY THURSDAY
9458
+ 1 456 234.56 03/09/17 $999 9.9EEEE CCAD 20AD
9459
+
9605
9460
  """
9606
9461
  arg_validate = []
9607
- arg_validate.append(["formatter", formatter, True, (str), True])
9462
+ arg_validate.append(["formatter", formatter, True, (str, ColumnExpression), True])
9608
9463
 
9609
9464
  # Validate argument types
9610
9465
  _Validators._validate_function_arguments(arg_validate)
9611
9466
 
9612
9467
  _args=[self.expression]
9613
9468
  if formatter:
9469
+ formatter = formatter.expression if isinstance(formatter, ColumnExpression) else formatter
9614
9470
  _args.append(formatter)
9615
9471
  return _SQLColumnExpression(func.to_char(*_args), type=VARCHAR())
9472
+
9473
+ def to_number(self, formatter=None):
9474
+ """
9475
+ DESCRIPTION:
9476
+ Converts a string-like representation of a number to NUMBER type.
9477
+
9478
+ PARAMETERS:
9479
+ formatter:
9480
+ Optional Argument.
9481
+ Specifies a variable length string containing formatting characters
9482
+ that define the format of the columns.
9483
+ Type: str OR ColumnExpression
9484
+ Notes:
9485
+ * If 'formatter' is omitted, numeric values is converted to a string exactly
9486
+ long enough to hold its significant digits.
9487
+ * Get the supported formatters using `get_formatters("NUMERIC")` function.
9488
+
9489
+ RAISES:
9490
+ TypeError, ValueError, TeradataMlException
9491
+
9492
+ RETURNS:
9493
+ ColumnExpression
9494
+
9495
+ EXAMPLES:
9496
+ # Load the data to run the example.
9497
+ >>> load_example_data("teradataml", "to_num_data")
9498
+
9499
+ # Create a DataFrame on 'to_num_data' table.
9500
+ >>> df = DataFrame("to_num_data")
9501
+ >>> df
9502
+ price col_format
9503
+ $1234 $9999
9504
+ USD123 C999
9505
+ 78.12 99.99
9506
+
9507
+ # Example 1: Convert 'price' column to number type without passing any formatter.
9508
+ >>> res = df.assign(new_col=df.price.to_number())
9509
+ >>> res
9510
+ price col_format new_col
9511
+ $1234 $9999 NaN
9512
+ USD123 C999 NaN
9513
+ 78.12 99.99 78.12
9514
+
9515
+ # Example 2: Convert 'price' column to number type by passing formatter as string.
9516
+ >>> res = df.assign(new_col=df.price.to_number('99.99'))
9517
+ >>> res
9518
+ price col_format new_col
9519
+ $1234 $9999 NaN
9520
+ USD123 C999 NaN
9521
+ 78.12 99.99 78.12
9522
+
9523
+ # Example 3: Convert 'price' column to number type by passing formatter as ColumnExpression.
9524
+ >>> res = df.assign(new_col=df.price.to_number(df.col_format))
9525
+ >>> res
9526
+ price col_format new_col
9527
+ $1234 $9999 1234
9528
+ USD123 C999 123
9529
+ 78.12 99.99 78.12
9530
+
9531
+ >>> df.tdtypes
9532
+ price VARCHAR(length=20, charset='LATIN')
9533
+ col_format VARCHAR(length=20, charset='LATIN')
9534
+ new_col NUMBER()
9535
+
9536
+ """
9537
+
9538
+ arg_validate = []
9539
+ arg_validate.append(["formatter", formatter, True, (str, ColumnExpression), True])
9540
+
9541
+ # Validate argument types
9542
+ _Validators._validate_function_arguments(arg_validate)
9543
+
9544
+ _args = [self.expression]
9545
+ if formatter is not None:
9546
+ formatter = formatter.expression if isinstance(formatter, ColumnExpression) else formatter
9547
+ _args.append(formatter)
9548
+
9549
+ return _SQLColumnExpression(func.to_number(*_args), type=NUMBER())
9616
9550
 
9617
9551
  def to_date(self, formatter=None):
9618
9552
  """
@@ -9625,213 +9559,10 @@ class _SQLColumnExpression(_LogicalColumnExpression,
9625
9559
  Specifies a variable length string containing formatting characters
9626
9560
  that define the format of column.
9627
9561
  Type: str
9628
- Note:
9562
+ Notes:
9629
9563
  * If "formatter" is omitted, the following default date format is used: 'YYYY-MM-DD'
9630
- * formatter for date type:
9631
- +--------------------------------------------------------------------------------------------------+
9632
- | FORMATTER DESCRIPTION |
9633
- +--------------------------------------------------------------------------------------------------+
9634
- | - |
9635
- | / |
9636
- | , Punctuation characters are ignored and text enclosed in |
9637
- | . quotation marks is ignored. |
9638
- | ; |
9639
- | : |
9640
- | "text" |
9641
- | Example: Date with value '2003-12-10' |
9642
- | +-------------------------------------------------+ |
9643
- | | data formatter value | |
9644
- | +-------------------------------------------------+ |
9645
- | | '2003-12-10' YYYY-MM-DD 03/12/10 | |
9646
- | +-------------------------------------------------+ |
9647
- +--------------------------------------------------------------------------------------------------+
9648
- | D Day of week (1-7). |
9649
- | Example: day of week with value '2' |
9650
- | +-------------------------------------------------+ |
9651
- | | data formatter value | |
9652
- | +-------------------------------------------------+ |
9653
- | | 2 D 24/01/01 | |
9654
- | +-------------------------------------------------+ |
9655
- +--------------------------------------------------------------------------------------------------+
9656
- | DAY Name of day. |
9657
- | Example: Date with value '2024-TUESDAY-01-30' |
9658
- | +-------------------------------------------------+ |
9659
- | | data formatter value | |
9660
- | +-------------------------------------------------+ |
9661
- | | 2024-TUESDAY-01-30 YYYY-DAY-MM-DD 24/01/30 | |
9662
- | +-------------------------------------------------+ |
9663
- +--------------------------------------------------------------------------------------------------+
9664
- | DD Day of month (1-31). |
9665
- | Example: Date with value '2003-10-25' |
9666
- | +-------------------------------------------------+ |
9667
- | | data formatter value | |
9668
- | +-------------------------------------------------+ |
9669
- | | 2003-10-25 YYYY-MM-DD 03/10/25 | |
9670
- | +-------------------------------------------------+ |
9671
- +--------------------------------------------------------------------------------------------------+
9672
- | DDD Day of year (1-366). |
9673
- | Example: Date with value '2024-366' |
9674
- | +-------------------------------------------------+ |
9675
- | | data formatter value | |
9676
- | +-------------------------------------------------+ |
9677
- | | 2024-366 YYYY-DDD 24/12/31 | |
9678
- | +-------------------------------------------------+ |
9679
- +--------------------------------------------------------------------------------------------------+
9680
- | DY abbreviated name of day. |
9681
- | Example: Date with value '2024-Mon-01-29' |
9682
- | +-------------------------------------------------+ |
9683
- | | data formatter value | |
9684
- | +-------------------------------------------------+ |
9685
- | | 2024-Mon-01-29 YYYY-DY-MM-DD 24/01/29 | |
9686
- | +-------------------------------------------------+ |
9687
- +--------------------------------------------------------------------------------------------------+
9688
- | HH |
9689
- | HH12 Hour of day (1-12). |
9690
- | Example: Date with value '2016-01-06 09:08:01' |
9691
- | +-------------------------------------------------+ |
9692
- | | data formatter value | |
9693
- | +-------------------------------------------------+ |
9694
- | | 2016-01-06 09:08:01 YYYY-MM-DD HH:MI:SS 6/01/06| |
9695
- | +-------------------------------------------------+ |
9696
- +--------------------------------------------------------------------------------------------------+
9697
- | HH24 Hour of the day (0-23). |
9698
- | Example: Date with value '2016-01-06 23:08:01' |
9699
- | +----------------------------------------------------+ |
9700
- | | data formatter value | |
9701
- | +----------------------------------------------------+ |
9702
- | | 2016-01-06 23:08:01 YYYY-MM-DD HH24:MI:SS 6/01/06 | |
9703
- | +----------------------------------------------------+ |
9704
- +--------------------------------------------------------------------------------------------------+
9705
- | J Julian day, the number of days since January 1, 4713 BC. |
9706
- | Number specified with J must be integers. |
9707
- | Teradata uses the Gregorian calendar in calculations to |
9708
- | and from Julian Days. |
9709
- | Example: Number of julian days with value '2457394' |
9710
- | +-------------------------------------------------+ |
9711
- | | data formatter value | |
9712
- | +-------------------------------------------------+ |
9713
- | | 2457394 J 16/01/06 | |
9714
- | +-------------------------------------------------+ |
9715
- +--------------------------------------------------------------------------------------------------+
9716
- | MI Minute (0-59). |
9717
- | Example: Date with value '2016-01-06 23:08:01' |
9718
- | +----------------------------------------------------+ |
9719
- | | data formatter value | |
9720
- | +----------------------------------------------------+ |
9721
- | | 2016-01-06 23:08:01 YYYY-MM-DD HH24:MI:SS 6/01/06 | |
9722
- | +----------------------------------------------------+ |
9723
- +--------------------------------------------------------------------------------------------------+
9724
- | MM Month (01-12). |
9725
- | Example: Date with value '2016-01-06 23:08:01' |
9726
- | +----------------------------------------------------+ |
9727
- | | data formatter value | |
9728
- | +----------------------------------------------------+ |
9729
- | | 2016-01-06 23:08:01 YYYY-MM-DD HH24:MI:SS 6/01/06 | |
9730
- | +----------------------------------------------------+ |
9731
- +--------------------------------------------------------------------------------------------------+
9732
- | MON Abbreviated name of month. |
9733
- | Example: Date with value '2016-JAN-06' |
9734
- | +----------------------------------------------------+ |
9735
- | | data formatter value | |
9736
- | +----------------------------------------------------+ |
9737
- | | 2016-JAN-06 YYYY-MON-DD 16/01/06 | |
9738
- | +----------------------------------------------------+ |
9739
- +--------------------------------------------------------------------------------------------------+
9740
- | MONTH Name of month. |
9741
- | Example: Date with value '2016-JANUARY-06' |
9742
- | +-------------------------------------------------+ |
9743
- | | data formatter value | |
9744
- | +-------------------------------------------------+ |
9745
- | | 2016-JANUARY-06 YYYY-MONTH-DD 16/01/06 | |
9746
- | +-------------------------------------------------+ |
9747
- +--------------------------------------------------------------------------------------------------+
9748
- | PM |
9749
- | P.M. Meridian indicator. |
9750
- | Example: Date with value '2016-01-06 23:08:01 PM' |
9751
- | +---------------------------------------------------------+ |
9752
- | | data formatter value | |
9753
- | +---------------------------------------------------------+ |
9754
- | | 2016-01-06 23:08:01 PM YYYY-MM-DD HH24:MI:SS PM 16/01/06| |
9755
- | +---------------------------------------------------------+ |
9756
- +--------------------------------------------------------------------------------------------------+
9757
- | RM Roman numeral month (I - XII). |
9758
- | Example: Date with value '2024-XII' |
9759
- | +-------------------------------------------------+ |
9760
- | | data formatter value | |
9761
- | +-------------------------------------------------+ |
9762
- | | 2024-XII YYYY-RM 24/12/01 | |
9763
- | +-------------------------------------------------+ |
9764
- +--------------------------------------------------------------------------------------------------+
9765
- | RR Stores 20th century dates in the 21st century using only |
9766
- | 2 digits. If the current year and the specified year are |
9767
- | both in the range of 0-49, the date is in the current |
9768
- | century. |
9769
- | Example: Date with value '2024-365, 21' |
9770
- | +-------------------------------------------------+ |
9771
- | | data formatter value | |
9772
- | +-------------------------------------------------+ |
9773
- | | 2024-365, 21 YYYY-DDD, RR 21/12/31 | |
9774
- | +-------------------------------------------------+ |
9775
- +--------------------------------------------------------------------------------------------------+
9776
- | RRRR Round year. Accepts either 4-digit or 2-digit input. |
9777
- | 2-digit input provides the same return as RR. |
9778
- | Example: Date with value '2024-365, 21' |
9779
- | +-------------------------------------------------+ |
9780
- | | data formatter value | |
9781
- | +-------------------------------------------------+ |
9782
- | | 2024-365, 21 YYYY-DDD, RRRR 24/12/31 | |
9783
- | +-------------------------------------------------+ |
9784
- +--------------------------------------------------------------------------------------------------+
9785
- | SS Second (0-59). |
9786
- | Example: Date with value '2016-01-06 23:08:01' |
9787
- | +----------------------------------------------------+ |
9788
- | | data formatter value | |
9789
- | +----------------------------------------------------+ |
9790
- | | 2016-01-06 23:08:01 YYYY-MM-DD HH24:MI:SS 6/01/06 | |
9791
- | +----------------------------------------------------+ |
9792
- +--------------------------------------------------------------------------------------------------+
9793
- | SSSSS Seconds past midnight (0-86399). |
9794
- +--------------------------------------------------------------------------------------------------+
9795
- | TZH Time zone hour. |
9796
- +--------------------------------------------------------------------------------------------------+
9797
- | TZM Time zone minute. |
9798
- +--------------------------------------------------------------------------------------------------+
9799
- | X Local radix character. |
9800
- | Example: Date with value '2024.366' |
9801
- | +-------------------------------------------------+ |
9802
- | | data formatter value | |
9803
- | +-------------------------------------------------+ |
9804
- | | 2024.366 YYYYXDDD 24/12/31 | |
9805
- | +-------------------------------------------------+ |
9806
- +--------------------------------------------------------------------------------------------------+
9807
- | Y,YYY Year with comma in this position. |
9808
- | Example: Date with value '2,024-366' |
9809
- | +-------------------------------------------------+ |
9810
- | | data formatter value | |
9811
- | +-------------------------------------------------+ |
9812
- | | 2,024-366 Y,YYY-DDD 24/12/31 | |
9813
- | +-------------------------------------------------+ |
9814
- +--------------------------------------------------------------------------------------------------+
9815
- | YYYY |
9816
- | SYYYY 4-digit year. S prefixes BC dates with a minus sign. |
9817
- | Example: Date with value '2024-366' |
9818
- | +-------------------------------------------------+ |
9819
- | | data formatter value | |
9820
- | +-------------------------------------------------+ |
9821
- | | 2024-366 YYYY-DDD 24/12/31 | |
9822
- | +-------------------------------------------------+ |
9823
- +--------------------------------------------------------------------------------------------------+
9824
- | YYY Last 3, 2, or 1 digit of year. |
9825
- | YY If the current year and the specified year are both in |
9826
- | Y the range of 0-49, the date is in the current century. |
9827
- | Example: Date with value '24-366' |
9828
- | +-------------------------------------------------+ |
9829
- | | data formatter value | |
9830
- | +-------------------------------------------------+ |
9831
- | | 24-366 YY-DDD 24/12/31 | |
9832
- | +-------------------------------------------------+ |
9833
- +--------------------------------------------------------------------------------------------------+
9834
-
9564
+ * Get the supported formatters using `get_formatters("DATE")` function.
9565
+
9835
9566
  RAISES:
9836
9567
  TypeError, ValueError, TeradataMlException
9837
9568
 
@@ -9907,7 +9638,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
9907
9638
  if formatter:
9908
9639
  _args.append(formatter)
9909
9640
  return _SQLColumnExpression(func.to_date(*_args), type=DATE())
9910
-
9641
+
9911
9642
  def trunc(self, expression=0, formatter=None):
9912
9643
  """
9913
9644
  DESCRIPTION:
@@ -10656,7 +10387,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10656
10387
  value = value.expression if isinstance(value, _SQLColumnExpression) else value
10657
10388
  return _SQLColumnExpression(_fun(self.expression, value), type=type_)
10658
10389
 
10659
- def parse_url(self, url_part):
10390
+ def parse_url(self, url_part, key=None):
10660
10391
  """
10661
10392
  DESCRIPTION:
10662
10393
  Extracts a specific part from the URL.
@@ -10668,6 +10399,13 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10668
10399
  Permitted Values: HOST, PATH, QUERY, REF, PROTOCOL, FILE, AUTHORITY, USERINFO
10669
10400
  Type: str or ColumnExpression
10670
10401
 
10402
+ key:
10403
+ Optional Argument.
10404
+ Specifies the key to be used for extracting the value from the query string.
10405
+ Note:
10406
+ * Applicable only when url_part is set to 'QUERY'.
10407
+ Type: str or ColumnExpression
10408
+
10671
10409
  Returns:
10672
10410
  ColumnExpression
10673
10411
 
@@ -10678,43 +10416,96 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10678
10416
  # Create a DataFrame on 'url_data' table.
10679
10417
  >>> df = DataFrame("url_data")
10680
10418
  >>> df
10681
- urls part
10682
- id
10683
- 3 https://www.facebook.com HOST
10684
- 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO
10685
- 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY
10686
- 2 https://example.net/path4/path5/path6?query4=value4#fragment REF
10687
- 0 http://example.com:8080/path FILE
10688
- 1 ftp://example.net:21/path PATH
10689
- 5 http://pg.example.ml/path150#fragment90 AUTHORITY
10690
- 7 https://www.google.com PROTOCOL
10691
-
10692
- # Example 1: Extract components from column 'urls' using column 'part'
10419
+ urls part query_key
10420
+ id
10421
+ 3 https://www.facebook.com HOST facebook.com
10422
+ 8 http://example.com/api?query1=value1&query2=value2 QUERY query1
10423
+ 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO password
10424
+ 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None
10425
+ 0 http://example.com:8080/path FILE path
10426
+ 2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment3
10427
+ 1 ftp://example.net:21/path PATH path
10428
+ 5 http://pg.example.ml/path150#fragment90 AUTHORITY fragment90
10429
+ 7 https://www.google.com PROTOCOL google.com
10430
+
10431
+ # Example 1: Extract components from column 'urls' using column 'part'.
10693
10432
  >>> df.assign(col = df.urls.parse_url(df.part))
10694
- urls part col
10695
- id
10696
- 3 https://www.facebook.com HOST www.facebook.com
10697
- 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO user:password
10698
- 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None
10699
- 2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment
10700
- 0 http://example.com:8080/path FILE /path
10701
- 1 ftp://example.net:21/path PATH /path
10702
- 5 http://pg.example.ml/path150#fragment90 AUTHORITY pg.example.ml
10703
- 7 https://www.google.com PROTOCOL https
10704
- >>>
10433
+ urls part query_key col
10434
+ id
10435
+ 3 https://www.facebook.com HOST facebook.com www.facebook.com
10436
+ 8 http://example.com/api?query1=value1&query2=value2 QUERY query1 query1=value1&query2=value2
10437
+ 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO password user:password
10438
+ 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None None
10439
+ 0 http://example.com:8080/path FILE path /path
10440
+ 2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment3 fragment
10441
+ 1 ftp://example.net:21/path PATH path /path
10442
+ 5 http://pg.example.ml/path150#fragment90 AUTHORITY fragment90 pg.example.ml
10443
+ 7 https://www.google.com PROTOCOL google.com https
10444
+
10445
+ # Example 2: Extract components from column 'urls' using 'part' and
10446
+ # 'query_key' column.
10447
+ >>> df.assign(col = df.urls.parse_url(df.part, df.query_key))
10448
+ urls part query_key col
10449
+ id
10450
+ 3 https://www.facebook.com HOST facebook.com None
10451
+ 8 http://example.com/api?query1=value1&query2=value2 QUERY query1 value1
10452
+ 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO password None
10453
+ 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None None
10454
+ 0 http://example.com:8080/path FILE path None
10455
+ 2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment3 None
10456
+ 1 ftp://example.net:21/path PATH path None
10457
+ 5 http://pg.example.ml/path150#fragment90 AUTHORITY fragment90 None
10458
+ 7 https://www.google.com PROTOCOL google.com None
10459
+
10460
+ # Extract components from column 'urls' using 'part' and 'query_key' str.
10461
+ >>> df.assign(col = df.urls.parse_url('QUERY', 'query2'))
10462
+ urls part query_key col
10463
+ id
10464
+ 3 https://www.facebook.com HOST facebook.com None
10465
+ 8 http://example.com/api?query1=value1&query2=value2 QUERY query1 value2
10466
+ 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO password None
10467
+ 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None None
10468
+ 0 http://example.com:8080/path FILE path None
10469
+ 2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment3 None
10470
+ 1 ftp://example.net:21/path PATH path None
10471
+ 5 http://pg.example.ml/path150#fragment90 AUTHORITY fragment90 None
10472
+ 7 https://www.google.com PROTOCOL google.com None
10705
10473
  """
10706
10474
 
10707
10475
  # Validating Arguments
10708
10476
  arg_type_matrix = []
10709
10477
  arg_type_matrix.append(["url_part", url_part, False, (str, ColumnExpression), True])
10478
+ arg_type_matrix.append(["key", key, True, (str, ColumnExpression), True])
10710
10479
  _Validators._validate_function_arguments(arg_type_matrix)
10711
10480
 
10481
+ # If key is provided and url_part is QUERY, then use regex to extract the value.
10482
+ if key is not None:
10483
+ query_expr = _SQLColumnExpression(func.regexp_substr(func.regexp_substr(self.expression,
10484
+ '[?&]' + (key.expression if isinstance(key, _SQLColumnExpression) else key) + '=([^&]*)'), '[^=]*$'), type=VARCHAR())
10485
+ # If url_part is a column expression, then use case statement to extract the value.
10486
+ if isinstance(url_part, _SQLColumnExpression):
10487
+ whens = [(url_part == 'HOST', None),
10488
+ (url_part == 'PATH', None ),
10489
+ (url_part == 'QUERY', query_expr),
10490
+ (url_part == 'REF', None),
10491
+ (url_part == 'PROTOCOL', None),
10492
+ (url_part == 'FILE',None),
10493
+ (url_part == 'AUTHORITY', None),
10494
+ (url_part == 'USERINFO', None)]
10495
+
10496
+ from teradataml.dataframe.sql_functions import case
10497
+ return case(whens)
10498
+
10499
+ # If url_part is a string, then return the query expression directly.
10500
+ if isinstance(url_part, str) and url_part == 'QUERY':
10501
+ return query_expr
10502
+
10712
10503
  # Regex pattern used to extract 'url_part' is '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'.
10713
10504
  # teradataml does not support regex grouping hence in some cases first used 'regex_replace' and
10714
10505
  # then 'regex_substr' or vice-versa.
10715
10506
  _part_to_extract_dict = {'HOST': _SQLColumnExpression(
10716
- func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), '(//[^/?#]+@)|(//)|(:\d+)', ''),
10717
- type=VARCHAR()),
10507
+ func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), '(//[^/?#]+@)|(//)|(:\d+)', ''),
10508
+ type=VARCHAR()),
10718
10509
  'PATH': _SQLColumnExpression(func.regexp_substr(
10719
10510
  func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
10720
10511
  '([^?#]*)'), type=VARCHAR()),