teradataml 20.0.0.5__py3-none-any.whl → 20.0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (53) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +96 -0
  3. teradataml/_version.py +1 -1
  4. teradataml/analytics/analytic_function_executor.py +1 -1
  5. teradataml/analytics/utils.py +56 -11
  6. teradataml/clients/auth_client.py +10 -6
  7. teradataml/clients/keycloak_client.py +165 -0
  8. teradataml/common/constants.py +10 -0
  9. teradataml/common/exceptions.py +32 -0
  10. teradataml/common/messagecodes.py +27 -0
  11. teradataml/common/messages.py +9 -1
  12. teradataml/common/sqlbundle.py +3 -2
  13. teradataml/common/utils.py +94 -12
  14. teradataml/context/context.py +37 -9
  15. teradataml/data/jsons/byom/onnxembeddings.json +1 -0
  16. teradataml/data/pattern_matching_data.csv +11 -0
  17. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  18. teradataml/data/teradataml_example.json +8 -1
  19. teradataml/data/url_data.csv +10 -9
  20. teradataml/dataframe/copy_to.py +1 -1
  21. teradataml/dataframe/dataframe.py +980 -82
  22. teradataml/dataframe/dataframe_utils.py +58 -25
  23. teradataml/dataframe/functions.py +962 -1
  24. teradataml/dataframe/sql.py +570 -1031
  25. teradataml/hyperparameter_tuner/utils.py +4 -2
  26. teradataml/lib/aed_0_1.dll +0 -0
  27. teradataml/opensource/_base.py +7 -1
  28. teradataml/options/configure.py +20 -4
  29. teradataml/scriptmgmt/UserEnv.py +13 -2
  30. teradataml/scriptmgmt/lls_utils.py +99 -24
  31. teradataml/sdk/README.md +79 -0
  32. teradataml/sdk/__init__.py +4 -0
  33. teradataml/sdk/_auth_modes.py +422 -0
  34. teradataml/sdk/_func_params.py +487 -0
  35. teradataml/sdk/_json_parser.py +453 -0
  36. teradataml/sdk/_openapi_spec_constants.py +249 -0
  37. teradataml/sdk/_utils.py +236 -0
  38. teradataml/sdk/api_client.py +897 -0
  39. teradataml/sdk/constants.py +62 -0
  40. teradataml/sdk/modelops/__init__.py +98 -0
  41. teradataml/sdk/modelops/_client.py +406 -0
  42. teradataml/sdk/modelops/_constants.py +304 -0
  43. teradataml/sdk/modelops/models.py +2308 -0
  44. teradataml/sdk/spinner.py +107 -0
  45. teradataml/table_operators/query_generator.py +4 -21
  46. teradataml/utils/dtypes.py +2 -1
  47. teradataml/utils/utils.py +0 -1
  48. teradataml/utils/validators.py +5 -1
  49. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.6.dist-info}/METADATA +101 -2
  50. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.6.dist-info}/RECORD +53 -36
  51. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.6.dist-info}/WHEEL +0 -0
  52. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.6.dist-info}/top_level.txt +0 -0
  53. {teradataml-20.0.0.5.dist-info → teradataml-20.0.0.6.dist-info}/zip-safe +0 -0
@@ -6738,8 +6738,42 @@ class _SQLColumnExpression(_LogicalColumnExpression,
6738
6738
  """
6739
6739
  return _SQLColumnExpression(self.expression.distinct().label(self.name))
6740
6740
 
6741
+ def _format_ilike_like_args(self, other, escape_char=None):
6742
+ """
6743
+ DESCRIPTION:
6744
+ Internal function to validate and format the arguments passed to
6745
+ 'ilike' and 'like' functions.
6746
+
6747
+ PARAMETERS:
6748
+ other:
6749
+ Required Argument.
6750
+ Specifies a string to match.
6751
+ Types: str OR ColumnExpression
6752
+
6753
+ escape_char:
6754
+ Optional Argument.
6755
+ Specifies the escape character to be used in the pattern.
6756
+ Types: str with one character
6757
+
6758
+ RETURNS:
6759
+ tuple
6760
+
6761
+ EXAMPLES:
6762
+ self._format_ilike_like_args(other='A!%', escape_char='!')
6763
+ """
6764
+ # Validate the arguments.
6765
+ arg_validate = []
6766
+ arg_validate.append(["other", other, False, (str, ColumnExpression), True])
6767
+ arg_validate.append(["escape_char", escape_char, True, (str), True])
6768
+ _Validators._validate_function_arguments(arg_validate)
6769
+
6770
+ # Format the arguments for ilike/like function.
6771
+ other = "{}".format(other.compile()) if isinstance(other, ColumnExpression) else "'{}'".format(other)
6772
+ escape = " ESCAPE '{}'".format(escape_char) if escape_char is not None else ""
6773
+ return other, escape
6774
+
6741
6775
  @collect_queryband(queryband="DFC_ilike")
6742
- def ilike(self, other):
6776
+ def ilike(self, other, escape_char=None):
6743
6777
  """
6744
6778
  DESCRIPTION:
6745
6779
  Function which is used to match the pattern.
@@ -6748,67 +6782,95 @@ class _SQLColumnExpression(_LogicalColumnExpression,
6748
6782
  other:
6749
6783
  Required Argument.
6750
6784
  Specifies a string to match. String match is case insensitive.
6751
- Types: str
6785
+ Types: str OR ColumnExpression
6786
+
6787
+ escape_char:
6788
+ Optional Argument.
6789
+ Specifies the escape character to be used in the pattern.
6790
+ Types: str with one character
6752
6791
 
6753
6792
  RETURNS:
6754
6793
  ColumnExpression.
6755
6794
 
6756
6795
  EXAMPLES:
6757
- >>> load_example_data("dataframe","admissions_train")
6758
- >>> df = DataFrame.from_table('admissions_train')
6759
- masters gpa stats programming admitted
6760
- id
6761
- 13 no 4.00 Advanced Novice 1
6762
- 26 yes 3.57 Advanced Advanced 1
6763
- 5 no 3.44 Novice Novice 0
6764
- 19 yes 1.98 Advanced Advanced 0
6765
- 15 yes 4.00 Advanced Advanced 1
6766
- 40 yes 3.95 Novice Beginner 0
6767
- 7 yes 2.33 Novice Novice 1
6768
- 22 yes 3.46 Novice Beginner 0
6769
- 36 no 3.00 Advanced Novice 0
6770
- 38 yes 2.65 Advanced Beginner 1
6771
-
6772
- # Example 1: Find out the records whose stats starts with 'A'.
6773
- >>> df = df[df.stats.ilike('a%')]
6796
+ # Load example data.
6797
+ >>> load_example_data("teradataml", "pattern_matching_data")
6798
+ >>> df = DataFrame('pattern_matching_data')
6799
+ data pattern level
6800
+ id
6801
+ 5 prod_01 prod_01% Beginner
6802
+ 8 log%2024 l_g% Beginner
6803
+ 2 user%2025 user!%% Beginner
6804
+ 6 prod%v2 prod!_% Novice
6805
+ 4 data%backup data@%% Advanced
6806
+ 10 backup_9 restore!_9 Beginner
6807
+ 7 log_file log^_file Advanced
6808
+ 1 user_Alpha user!_% Advanced
6809
+ 3 data_2024 d% Novice
6810
+ 9 temp_file temp!__% Novice
6811
+
6812
+ # Example 1: Find out the records which starts with 'A' in the column 'level'.
6813
+ >>> df = df[df.level.ilike('A%')]
6774
6814
  >>> df
6775
- masters gpa stats programming admitted
6776
- id
6777
- 19 yes 1.98 Advanced Advanced 0
6778
- 15 yes 4.00 Advanced Advanced 1
6779
- 38 yes 2.65 Advanced Beginner 1
6780
- 26 yes 3.57 Advanced Advanced 1
6781
- 17 no 3.83 Advanced Advanced 1
6782
- 34 yes 3.85 Advanced Beginner 0
6783
- 13 no 4.00 Advanced Novice 1
6784
- 24 no 1.87 Advanced Novice 1
6785
- 36 no 3.00 Advanced Novice 0
6786
- 27 yes 3.96 Advanced Advanced 0
6815
+ data pattern level
6816
+ id
6817
+ 4 data%backup data@%% Advanced
6818
+ 7 log_file log^_file Advanced
6819
+ 1 user_Alpha user!_% Advanced
6787
6820
  >>>
6788
6821
 
6789
6822
  # Example 2: Create a new Column with values as -
6790
- # 1 if value of column 'stats' starts with 'a' and third letter is 'v',
6823
+ # 1 if value of column 'level' starts with 'n' and third letter is 'v',
6791
6824
  # 0 otherwise. Ignore case.
6792
- >>> df.assign(new_col = case_when((df.stats.ilike('a_v%').expression, 1), else_=0))
6793
- masters gpa stats programming admitted n
6794
- id
6795
- 13 no 4.00 Advanced Novice 1 1
6796
- 26 yes 3.57 Advanced Advanced 1 1
6797
- 5 no 3.44 Novice Novice 0 0
6798
- 19 yes 1.98 Advanced Advanced 0 1
6799
- 15 yes 4.00 Advanced Advanced 1 1
6800
- 40 yes 3.95 Novice Beginner 0 0
6801
- 7 yes 2.33 Novice Novice 1 0
6802
- 22 yes 3.46 Novice Beginner 0 0
6803
- 36 no 3.00 Advanced Novice 0 1
6804
- 38 yes 2.65 Advanced Beginner 1 1
6825
+ >>> from sqlalchemy.sql.expression import case as case_when
6826
+ >>> df.assign(new_col = case_when((df.level.ilike('n_v%').expression, 1), else_=0))
6827
+ data pattern level new_col
6828
+ id
6829
+ 3 data_2024 d% Novice 1
6830
+ 1 user_Alpha user!_% Advanced 0
6831
+ 8 log%2024 l_g% Beginner 0
6832
+ 2 user%2025 user!%% Beginner 0
6833
+ 10 backup_9 restore!_9 Beginner 0
6834
+ 9 temp_file temp!__% Novice 1
6835
+ 6 prod%v2 prod!_% Novice 1
6836
+ 5 prod_01 prod_01% Beginner 0
6837
+ 4 data%backup data@%% Advanced 0
6838
+ 7 log_file log^_file Advanced 0
6839
+ >>>
6840
+
6841
+ # Example 3: Find out the records where the value in the 'data' column
6842
+ # matches the pattern specified in the 'pattern' column.
6843
+ >>> df = df[df.data.ilike(df.pattern)]
6844
+ >>> df
6845
+ data pattern level
6846
+ id
6847
+ 3 data_2024 d% Novice
6848
+ 8 log%2024 l_g% Beginner
6849
+ 5 prod_01 prod_01% Beginner
6850
+ >>>
6851
+
6852
+ # Example 4: Find out the records where the value in the 'data' column
6853
+ # matches the pattern specified in the 'pattern' column considering the
6854
+ # escape character as '!'.
6855
+ >>> df = df[df.data.ilike(df.pattern, escape_char='!')]
6856
+ >>> df
6857
+ data pattern level
6858
+ id
6859
+ 8 log%2024 l_g% Beginner
6860
+ 9 temp_file temp!__% Novice
6861
+ 3 data_2024 d% Novice
6862
+ 2 user%2025 user!%% Beginner
6863
+ 1 user_Alpha user!_% Advanced
6864
+ 5 prod_01 prod_01% Beginner
6805
6865
  >>>
6806
6866
  """
6867
+ # Validate and format arguments
6868
+ other, escape = self._format_ilike_like_args(other, escape_char)
6807
6869
  return _SQLColumnExpression(
6808
- literal_column("{} (NOT CASESPECIFIC) LIKE '{}'".format(self.compile(), other)))
6870
+ literal_column("{} (NOT CASESPECIFIC) LIKE {}{}".format(self.compile(), other, escape)))
6809
6871
 
6810
6872
  @collect_queryband(queryband="DFC_like")
6811
- def like(self, other):
6873
+ def like(self, other, escape_char=None):
6812
6874
  """
6813
6875
  DESCRIPTION:
6814
6876
  Function which is used to match the pattern.
@@ -6816,66 +6878,190 @@ class _SQLColumnExpression(_LogicalColumnExpression,
6816
6878
  PARAMETERS:
6817
6879
  other:
6818
6880
  Required Argument.
6819
- Specifies a string to match. String match is case insensitive.
6820
- Types: str
6881
+ Specifies a string to match. String match is case sensitive.
6882
+ Types: str OR ColumnExpression
6883
+
6884
+ escape_char:
6885
+ Optional Argument.
6886
+ Specifies the escape character to be used in the pattern.
6887
+ Types: str with one character
6821
6888
 
6822
6889
  RETURNS:
6823
6890
  ColumnExpression.
6824
6891
 
6825
6892
  EXAMPLES:
6826
- >>> load_example_data("dataframe","admissions_train")
6827
- >>> df = DataFrame.from_table('admissions_train')
6828
- masters gpa stats programming admitted
6829
- id
6830
- 13 no 4.00 Advanced Novice 1
6831
- 26 yes 3.57 Advanced Advanced 1
6832
- 5 no 3.44 Novice Novice 0
6833
- 19 yes 1.98 Advanced Advanced 0
6834
- 15 yes 4.00 Advanced Advanced 1
6835
- 40 yes 3.95 Novice Beginner 0
6836
- 7 yes 2.33 Novice Novice 1
6837
- 22 yes 3.46 Novice Beginner 0
6838
- 36 no 3.00 Advanced Novice 0
6839
- 38 yes 2.65 Advanced Beginner 1
6840
-
6841
- # Example 1: Find out the records whose stats starts with 'A'.
6842
- >>> df = df[df.stats.like('A%')]
6893
+ # Load example data.
6894
+ >>> load_example_data("teradataml", "pattern_matching_data")
6895
+ >>> df = DataFrame('pattern_matching_data')
6896
+ data pattern level
6897
+ id
6898
+ 5 prod_01 prod_01% Beginner
6899
+ 8 log%2024 l_g% Beginner
6900
+ 2 user%2025 user!%% Beginner
6901
+ 6 prod%v2 prod!_% Novice
6902
+ 4 data%backup data@%% Advanced
6903
+ 10 backup_9 restore!_9 Beginner
6904
+ 7 log_file log^_file Advanced
6905
+ 1 user_Alpha user!_% Advanced
6906
+ 3 data_2024 d% Novice
6907
+ 9 temp_file temp!__% Novice
6908
+
6909
+ # Example 1: Find out the records which starts with 'A' in the column 'level'.
6910
+ >>> df = df[df.level.like('A%')]
6843
6911
  >>> df
6844
- masters gpa stats programming admitted
6845
- id
6846
- 19 yes 1.98 Advanced Advanced 0
6847
- 15 yes 4.00 Advanced Advanced 1
6848
- 38 yes 2.65 Advanced Beginner 1
6849
- 26 yes 3.57 Advanced Advanced 1
6850
- 17 no 3.83 Advanced Advanced 1
6851
- 34 yes 3.85 Advanced Beginner 0
6852
- 13 no 4.00 Advanced Novice 1
6853
- 24 no 1.87 Advanced Novice 1
6854
- 36 no 3.00 Advanced Novice 0
6855
- 27 yes 3.96 Advanced Advanced 0
6912
+ data pattern level
6913
+ id
6914
+ 4 data%backup data@%% Advanced
6915
+ 7 log_file log^_file Advanced
6916
+ 1 user_Alpha user!_% Advanced
6856
6917
  >>>
6857
6918
 
6858
6919
  # Example 2: Create a new Column with values as -
6859
- # 1 if value of column 'stats' starts with 'A' and third letter is 'v',
6920
+ # 1 if value of column 'stats' starts with 'N' and third letter is 'v',
6860
6921
  # 0 otherwise. Do not ignore case.
6861
- >>> df.assign(new_col = case_when((df.stats.like('A_v%').expression, 1), else_=0))
6922
+ >>> from sqlalchemy.sql.expression import case as case_when
6923
+ >>> df.assign(new_col = case_when((df.level.like('N_v%').expression, 1), else_=0))
6924
+ data pattern level new_col
6925
+ id
6926
+ 3 data_2024 d% Novice 1
6927
+ 1 user_Alpha user!_% Advanced 0
6928
+ 8 log%2024 l_g% Beginner 0
6929
+ 2 user%2025 user!%% Beginner 0
6930
+ 10 backup_9 restore!_9 Beginner 0
6931
+ 9 temp_file temp!__% Novice 1
6932
+ 6 prod%v2 prod!_% Novice 1
6933
+ 5 prod_01 prod_01% Beginner 0
6934
+ 4 data%backup data@%% Advanced 0
6935
+ 7 log_file log^_file Advanced 0
6936
+ >>>
6937
+
6938
+ # Example 3: Find out the records where the value in the 'data' column
6939
+ # matches the pattern specified in the 'pattern' column.
6940
+ >>> df = df[df.data.like(df.pattern)]
6862
6941
  >>> df
6863
- masters gpa stats programming admitted n
6864
- id
6865
- 13 no 4.00 Advanced Novice 1 1
6866
- 26 yes 3.57 Advanced Advanced 1 1
6867
- 5 no 3.44 Novice Novice 0 0
6868
- 19 yes 1.98 Advanced Advanced 0 1
6869
- 15 yes 4.00 Advanced Advanced 1 1
6870
- 40 yes 3.95 Novice Beginner 0 0
6871
- 7 yes 2.33 Novice Novice 1 0
6872
- 22 yes 3.46 Novice Beginner 0 0
6873
- 36 no 3.00 Advanced Novice 0 1
6874
- 38 yes 2.65 Advanced Beginner 1 1
6942
+ data pattern level
6943
+ id
6944
+ 3 data_2024 d% Novice
6945
+ 8 log%2024 l_g% Beginner
6946
+ 5 prod_01 prod_01% Beginner
6875
6947
  >>>
6948
+
6949
+ # Example 4: Find out the records where the value in the 'data' column
6950
+ # matches the pattern specified in the 'pattern' column considering the
6951
+ # escape character as '!'.
6952
+ >>> df = df[df.data.like(df.pattern, escape_char='!')]
6953
+ >>> df
6954
+ data pattern level
6955
+ id
6956
+ 8 log%2024 l_g% Beginner
6957
+ 9 temp_file temp!__% Novice
6958
+ 3 data_2024 d% Novice
6959
+ 2 user%2025 user!%% Beginner
6960
+ 1 user_Alpha user!_% Advanced
6961
+ 5 prod_01 prod_01% Beginner
6962
+ >>>
6963
+ """
6964
+ # Validate and format arguments
6965
+ other, escape = self._format_ilike_like_args(other, escape_char)
6966
+ return _SQLColumnExpression(
6967
+ literal_column("{} (CASESPECIFIC) LIKE {}{}".format(self.compile(), other, escape)))
6968
+
6969
+ def rlike(self, pattern, case_sensitive=True):
6970
+ """
6971
+ DESCRIPTION:
6972
+ Function to match a string against a regular expression pattern.
6973
+
6974
+ PARAMETERS:
6975
+ pattern:
6976
+ Required Argument.
6977
+ Specifies a regular expression pattern to match against the column values.
6978
+ Note:
6979
+ The pattern follows POSIX regular expression syntax.
6980
+ Type: str OR ColumnExpression
6981
+
6982
+ case_sensitive:
6983
+ Optional Argument.
6984
+ Specifies whether the pattern matching is case-sensitive.
6985
+ When set to False, the function ignores case sensitivity and matches
6986
+ the regex. Otherwise, function considers case sensitivity while matching regex.
6987
+ Default: True
6988
+ Type: bool
6989
+
6990
+ RAISES:
6991
+ TeradataMlException
6992
+
6993
+ RETURNS:
6994
+ ColumnExpression
6995
+
6996
+ EXAMPLES:
6997
+ >>> load_example_data("dataframe","admissions_train")
6998
+ >>> df = DataFrame("admissions_train")
6999
+ >>> df
7000
+ masters gpa stats programming admitted
7001
+ id
7002
+ 13 no 4.00 Advanced Novice 1
7003
+ 26 yes 3.57 Advanced Advanced 1
7004
+ 5 no 3.44 Novice Novice 0
7005
+ 19 yes 1.98 Advanced Advanced 0
7006
+ 15 yes 4.00 Advanced Advanced 1
7007
+ 40 yes 3.95 Novice Beginner 0
7008
+ 7 yes 2.33 Novice Novice 1
7009
+ 22 yes 3.46 Novice Beginner 0
7010
+ 36 no 3.00 Advanced Novice 0
7011
+ 38 yes 2.65 Advanced Beginner 1
7012
+
7013
+ # Example 1: Find records whose 'stats' column contains 'van'.
7014
+ >>> result = df[df.stats.rlike('.*van.*')]
7015
+ >>> result
7016
+ masters gpa stats programming admitted
7017
+ id
7018
+ 13 no 4.00 Advanced Novice 1
7019
+ 26 yes 3.57 Advanced Advanced 1
7020
+ 34 yes 3.85 Advanced Beginner 0
7021
+ 19 yes 1.98 Advanced Advanced 0
7022
+ 15 yes 4.00 Advanced Advanced 1
7023
+ 36 no 3.00 Advanced Novice 0
7024
+ 38 yes 2.65 Advanced Beginner 1
7025
+
7026
+ # Example 2: Find records whose 'stats' column ends with 'ced'.
7027
+ >>> result = df[df.stats.rlike('.*ced$')]
7028
+ >>> result
7029
+ masters gpa stats programming admitted
7030
+ id
7031
+ 34 yes 3.85 Advanced Beginner 0
7032
+ 32 yes 3.46 Advanced Beginner 0
7033
+ 11 no 3.13 Advanced Advanced 1
7034
+ 30 yes 3.79 Advanced Novice 0
7035
+ 28 no 3.93 Advanced Advanced 1
7036
+ 16 no 3.70 Advanced Advanced 1
7037
+ 14 yes 3.45 Advanced Advanced 0
7038
+
7039
+ # Example 3: Case-insensitive search for records containing 'NOVICE'.
7040
+ >>> result = df[df.stats.rlike('NOVICE', case_sensitive=False)]
7041
+ >>> result
7042
+ masters gpa stats programming admitted
7043
+ id
7044
+ 12 no 3.65 Novice Novice 1
7045
+ 40 yes 3.95 Novice Beginner 0
7046
+ 7 yes 2.33 Novice Novice 1
7047
+ 5 no 3.44 Novice Novice 0
7048
+ 22 yes 3.46 Novice Beginner 0
7049
+ 37 no 3.52 Novice Novice 1
6876
7050
  """
7051
+ # Validate arguments
7052
+ arg_validate = []
7053
+ arg_validate.append(["pattern", pattern, False, (str, ColumnExpression), True])
7054
+ arg_validate.append(["case_sensitive", case_sensitive, True, (bool), True])
7055
+ _Validators._validate_function_arguments(arg_validate)
7056
+
7057
+ if isinstance(pattern, ColumnExpression):
7058
+ pattern = pattern.expression
7059
+
7060
+ # Set the case sensitivity modifier based on the parameter.
7061
+ case_modifier = 'c' if case_sensitive else 'i'
6877
7062
  return _SQLColumnExpression(
6878
- literal_column("{} (CASESPECIFIC) LIKE '{}'".format(self.compile(), other)))
7063
+ func.regexp_similar(self.expression, pattern, case_modifier) == 1,
7064
+ type=INTEGER())
6879
7065
 
6880
7066
  @collect_queryband(queryband="DFC_startswith")
6881
7067
  def startswith(self, other):
@@ -7057,6 +7243,202 @@ class _SQLColumnExpression(_LogicalColumnExpression,
7057
7243
  return _SQLColumnExpression(func.substr(self.expression, start_pos_expr, length_expr),
7058
7244
  type=self.type)
7059
7245
 
7246
+ def count_delimiters(self, delimiter):
7247
+ """
7248
+ DESCRIPTION:
7249
+ Function to count the total number of occurrences of a specified delimiter.
7250
+
7251
+ PARAMETERS:
7252
+ delimiter:
7253
+ Required Argument.
7254
+ Specifies the delimiter to count in the column values.
7255
+ Types: str
7256
+
7257
+ RETURNS:
7258
+ ColumnExpression.
7259
+
7260
+ EXAMPLES:
7261
+ # Load sample data
7262
+ >>> load_example_data("dataframe", "admissions_train")
7263
+ >>> df = DataFrame("admissions_train")
7264
+
7265
+ # Create a DataFrame with a column containing delimiters.
7266
+ >>> df1 = df.assign(delim_col = 'ab.c.def.g')
7267
+ >>> df1
7268
+ masters gpa stats programming admitted delim_col
7269
+ id
7270
+ 38 yes 2.65 Advanced Beginner 1 ab.c.def.g
7271
+ 7 yes 2.33 Novice Novice 1 ab.c.def.g
7272
+ 26 yes 3.57 Advanced Advanced 1 ab.c.def.g
7273
+
7274
+ # Example 1: Count the number of periods in column 'delim_col'.
7275
+ >>> res = df1.assign(dot_count = df1.delim_col.count_delimiters('.'))
7276
+ >>> res
7277
+ masters gpa stats programming admitted delim_col dot_count
7278
+ id
7279
+ 38 yes 2.65 Advanced Beginner 1 ab.c.def.g 3
7280
+ 7 yes 2.33 Novice Novice 1 ab.c.def.g 3
7281
+ 26 yes 3.57 Advanced Advanced 1 ab.c.def.g 3
7282
+
7283
+ # Example 2: Count multiple delimiters in a string.
7284
+ >>> df2 = df.assign(delim_col = 'a,b;c;d-e')
7285
+ >>> res = df2.assign(
7286
+ ... comma_count = df2.delim_col.count_delimiters(','),
7287
+ ... semicolon_count = df2.delim_col.count_delimiters(';'),
7288
+ ... colon_count = df2.delim_col.count_delimiters(':'),
7289
+ ... dash_count = df2.delim_col.count_delimiters('-')
7290
+ ... )
7291
+ >>> res
7292
+ masters gpa stats programming admitted delim_col colon_count comma_count dash_count semicolon_count
7293
+ id
7294
+ 38 yes 2.65 Advanced Beginner 1 a,b;c;d-e 0 1 1 2
7295
+ 7 yes 2.33 Novice Novice 1 a,b;c;d-e 0 1 1 2
7296
+ 26 yes 3.57 Advanced Advanced 1 a,b;c;d-e 0 1 1 2
7297
+ 5 no 3.44 Novice Novice 0 a,b;c;d-e 0 1 1 2
7298
+ """
7299
+
7300
+ # Validate arguments
7301
+ arg_validate = []
7302
+ arg_validate.append(["delimiter", delimiter, False, (str), True])
7303
+ _Validators._validate_function_arguments(arg_validate)
7304
+
7305
+ # Calculate the count by comparing the original string length
7306
+ # with the length after removing all delimiters.
7307
+ expression = (func.characters(self.expression) - func.characters(
7308
+ func.oreplace(self.expression, delimiter, '')))// func.characters(delimiter)
7309
+
7310
+ return _SQLColumnExpression(expression, type=INTEGER())
7311
+
7312
+ @collect_queryband(queryband="DFC_substringIndex")
7313
+ def substring_index(self, delimiter, count):
7314
+ """
7315
+ DESCRIPTION:
7316
+ Function to return the substring from a column before a specified
7317
+ delimiter, up to a given occurrence count.
7318
+
7319
+ PARAMETERS:
7320
+ delimiter:
7321
+ Required Argument.
7322
+ Specifies the delimiter string to split the column values.
7323
+ Types: str
7324
+
7325
+ count:
7326
+ Required Argument.
7327
+ Specifies the number of occurrences of the delimiter to consider.
7328
+ If positive, the substring is extracted from the start of the string.
7329
+ If negative, the substring is extracted from the end of the string.
7330
+ If zero, an empty string is returned.
7331
+ Types: int
7332
+
7333
+ RAISES:
7334
+ TeradataMlException
7335
+
7336
+ RETURNS:
7337
+ ColumnExpression.
7338
+
7339
+ EXAMPLES:
7340
+ # Load the data to run the example.
7341
+ >>> load_example_data("dataframe","admissions_train")
7342
+ >>> df = DataFrame('admissions_train')
7343
+
7344
+ # Create a new column 'delim_col' with string.
7345
+ >>> df1 = df.assign(delim_col = 'ab.c.def.g')
7346
+ >>> df1
7347
+ masters gpa stats programming admitted delim_col
7348
+ id
7349
+ 38 yes 2.65 Advanced Beginner 1 ab.c.def.g
7350
+ 7 yes 2.33 Novice Novice 1 ab.c.def.g
7351
+ 26 yes 3.57 Advanced Advanced 1 ab.c.def.g
7352
+ 5 no 3.44 Novice Novice 0 ab.c.def.g
7353
+ 3 no 3.70 Novice Beginner 1 ab.c.def.g
7354
+ 22 yes 3.46 Novice Beginner 0 ab.c.def.g
7355
+ 1 yes 3.95 Beginner Beginner 0 ab.c.def.g
7356
+ 17 no 3.83 Advanced Advanced 1 ab.c.def.g
7357
+ 15 yes 4.00 Advanced Advanced 1 ab.c.def.g
7358
+ 34 yes 3.85 Advanced Beginner 0 ab.c.def.g
7359
+
7360
+ # Example 1: Create a new column 'new_column' by extracting the substring
7361
+ based on positive count.
7362
+ >>> res = df1.assign(new_column = df1.delim_col.substring_index('.', 2))
7363
+ >>> res
7364
+ masters gpa stats programming admitted delim_col new_column
7365
+ id
7366
+ 34 yes 3.85 Advanced Beginner 0 ab.c.def.g ab.c
7367
+ 32 yes 3.46 Advanced Beginner 0 ab.c.def.g ab.c
7368
+ 11 no 3.13 Advanced Advanced 1 ab.c.def.g ab.c
7369
+ 30 yes 3.79 Advanced Novice 0 ab.c.def.g ab.c
7370
+ 28 no 3.93 Advanced Advanced 1 ab.c.def.g ab.c
7371
+ 16 no 3.70 Advanced Advanced 1 ab.c.def.g ab.c
7372
+ 35 no 3.68 Novice Beginner 1 ab.c.def.g ab.c
7373
+ 40 yes 3.95 Novice Beginner 0 ab.c.def.g ab.c
7374
+ 19 yes 1.98 Advanced Advanced 0 ab.c.def.g ab.c
7375
+
7376
+ # Example 2: Create a new column 'new_column' by extracting the substring
7377
+ based on negative count.
7378
+ >>> res = df1.assign(new_column = df1.delim_col.substring_index('.', -3))
7379
+ >>> res
7380
+ masters gpa stats programming admitted delim_col new_column
7381
+ id
7382
+ 34 yes 3.85 Advanced Beginner 0 ab.c.def.g c.def.g
7383
+ 32 yes 3.46 Advanced Beginner 0 ab.c.def.g c.def.g
7384
+ 11 no 3.13 Advanced Advanced 1 ab.c.def.g c.def.g
7385
+ 30 yes 3.79 Advanced Novice 0 ab.c.def.g c.def.g
7386
+ 28 no 3.93 Advanced Advanced 1 ab.c.def.g c.def.g
7387
+ 16 no 3.70 Advanced Advanced 1 ab.c.def.g c.def.g
7388
+ 35 no 3.68 Novice Beginner 1 ab.c.def.g c.def.g
7389
+ 40 yes 3.95 Novice Beginner 0 ab.c.def.g c.def.g
7390
+ 19 yes 1.98 Advanced Advanced 0 ab.c.def.g c.def.g
7391
+
7392
+ # Example 3: Create a new column 'new_column' by extracting the substring
7393
+ with 2-character delimiter based on positive count.
7394
+ >>> res = df1.assign(new_column = df1.delim_col.substring_index('c.d', 1))
7395
+ >>> res
7396
+ masters gpa stats programming admitted delim_col new_column
7397
+ id
7398
+ 34 yes 3.85 Advanced Beginner 0 ab.c.def.g ab.
7399
+ 32 yes 3.46 Advanced Beginner 0 ab.c.def.g ab.
7400
+ 11 no 3.13 Advanced Advanced 1 ab.c.def.g ab.
7401
+ 30 yes 3.79 Advanced Novice 0 ab.c.def.g ab.
7402
+ 28 no 3.93 Advanced Advanced 1 ab.c.def.g ab.
7403
+ 16 no 3.70 Advanced Advanced 1 ab.c.def.g ab.
7404
+ 35 no 3.68 Novice Beginner 1 ab.c.def.g ab.
7405
+ 40 yes 3.95 Novice Beginner 0 ab.c.def.g ab.
7406
+
7407
+ """
7408
+ # Validate arguments
7409
+ arg_validate = []
7410
+ arg_validate.append(["delimiter", delimiter, False, (str), True])
7411
+ arg_validate.append(["count", count, False, (int), True])
7412
+ _Validators._validate_function_arguments(arg_validate)
7413
+
7414
+ # Create the SQL expression for substring_index.
7415
+ if count == 0:
7416
+ return _SQLColumnExpression(literal(""), type=self.type)
7417
+
7418
+ elif count > 0:
7419
+ # For positive count, return substring before the nth occurrence.
7420
+ position = func.instr(self.expression, delimiter, 1, count)
7421
+ # Handle the case where the delimiter is not found.
7422
+ expression = case_when((position == 0, self.expression),
7423
+ else_=func.substring(self.expression, 1, position - 1))
7424
+ else:
7425
+ # For negative count, we need to find substring after the (total - |count|)th delimiter
7426
+ # First, get the total number of delimiters
7427
+ total_delimiters = self.count_delimiters(delimiter).expression
7428
+
7429
+ # Calculate the position to start from (convert negative count to positive position).
7430
+ position = total_delimiters + count + 1
7431
+
7432
+ # Handle the case where the absolute negative count exceeds the total number of delimiters.
7433
+ expression = case_when((position > 0,
7434
+ # Get substring after the nth occurrence from the beginning.
7435
+ func.substring(self.expression,
7436
+ func.instr(self.expression, delimiter, 1, position) + len(delimiter),
7437
+ func.characters(self.expression))),
7438
+ else_=self.expression)
7439
+
7440
+ return _SQLColumnExpression(expression, type=self.type)
7441
+
7060
7442
  @collect_queryband(queryband="DFC_replace")
7061
7443
  def replace(self, to_replace, value=None):
7062
7444
  """
@@ -8969,566 +9351,11 @@ class _SQLColumnExpression(_LogicalColumnExpression,
8969
9351
  Optional Argument.
8970
9352
  Specifies the format for formatting the values of the column.
8971
9353
  Type: str OR ColumnExpression
8972
- Note:
9354
+ Notes:
8973
9355
  * If 'formatter' is omitted, numeric values is converted to a string exactly
8974
9356
  long enough to hold its significant digits.
9357
+ * Get the supported formatters using `get_formatters("CHAR")` function.
8975
9358
 
8976
- * Formatter for Numeric types:
8977
- +--------------------------------------------------------------------------------------------------+
8978
- | FORMATTER DESCRIPTION |
8979
- +--------------------------------------------------------------------------------------------------+
8980
- | , (comma) A comma in the specified position. |
8981
- | A comma cannot begin a number format. |
8982
- | A comma cannot appear to the right of a decimal |
8983
- | character or period in a number format. |
8984
- | Example: |
8985
- | +-------------------------------------------------+ |
8986
- | | data formatter result | |
8987
- | +-------------------------------------------------+ |
8988
- | | 1234 9,999 1,234 | |
8989
- | +-------------------------------------------------+ |
8990
- +--------------------------------------------------------------------------------------------------+
8991
- | . (period) A decimal point. |
8992
- | User can only specify one period in a number format. |
8993
- | Example: |
8994
- | +-------------------------------------------------+ |
8995
- | | data formatter result | |
8996
- | +-------------------------------------------------+ |
8997
- | | 123.46 9999.9 123.5 | |
8998
- | +-------------------------------------------------+ |
8999
- +--------------------------------------------------------------------------------------------------+
9000
- | $ A value with a leading dollar sign. |
9001
- | Example: |
9002
- | +-------------------------------------------------+ |
9003
- | | data formatter result | |
9004
- | +-------------------------------------------------+ |
9005
- | | 1234 $9999 $1234 | |
9006
- | +-------------------------------------------------+ |
9007
- +--------------------------------------------------------------------------------------------------+
9008
- | 0 Leading zeros. |
9009
- | Trailing zeros. |
9010
- | Example: |
9011
- | +-------------------------------------------------+ |
9012
- | | data formatter result | |
9013
- | +-------------------------------------------------+ |
9014
- | | 1234 09999 01234 | |
9015
- | +-------------------------------------------------+ |
9016
- +--------------------------------------------------------------------------------------------------+
9017
- | 9 A value with the specified number of digits with a |
9018
- | leading space if positive or with a leading minus |
9019
- | if negative. |
9020
- | Example: |
9021
- | +-------------------------------------------------+ |
9022
- | | data formatter result | |
9023
- | +-------------------------------------------------+ |
9024
- | | 1234 9999 1234 | |
9025
- | | 1234 999 #### | |
9026
- | +-------------------------------------------------+ |
9027
- +--------------------------------------------------------------------------------------------------+
9028
- | B Blank space for the integer part of a fixed point number|
9029
- | when the integer part is zero. |
9030
- | Example: |
9031
- | +-------------------------------------------------+ |
9032
- | | data formatter result | |
9033
- | +-------------------------------------------------+ |
9034
- | | 0.1234 B.999 Blank space| |
9035
- | +-------------------------------------------------+ |
9036
- +--------------------------------------------------------------------------------------------------+
9037
- | C The ISO currency symbol as specified in the ISOCurrency |
9038
- | element in the SDF file. |
9039
- | Example: |
9040
- | +-------------------------------------------------+ |
9041
- | | data formatter result | |
9042
- | +-------------------------------------------------+ |
9043
- | | 234 C999 USD234 | |
9044
- | +-------------------------------------------------+ |
9045
- +--------------------------------------------------------------------------------------------------+
9046
- | D The character that separates the integer and fractional |
9047
- | part of non-monetary values. |
9048
- | Example: |
9049
- | +-------------------------------------------------+ |
9050
- | | data formatter result | |
9051
- | +-------------------------------------------------+ |
9052
- | | 234.56 999D9 234.6 | |
9053
- | +-------------------------------------------------+ |
9054
- +--------------------------------------------------------------------------------------------------+
9055
- | EEEE A value in scientific notation. |
9056
- | Example: |
9057
- | +-------------------------------------------------+ |
9058
- | | data formatter result | |
9059
- | +-------------------------------------------------+ |
9060
- | | 234.56 9.9EEEE 2.3E+02 | |
9061
- | +-------------------------------------------------+ |
9062
- +--------------------------------------------------------------------------------------------------+
9063
- | G The character that separates groups of digits in the |
9064
- | integer part of non-monetary values. |
9065
- | +-------------------------------------------------+ |
9066
- | | data formatter result | |
9067
- | +-------------------------------------------------+ |
9068
- | | 123456 9G99G99 1,234,56 | |
9069
- | +-------------------------------------------------+ |
9070
- +--------------------------------------------------------------------------------------------------+
9071
- | L The string representing the local currency as specified |
9072
- | in the Currency element according to system settings. |
9073
- | Example: |
9074
- | +-------------------------------------------------+ |
9075
- | | data formatter result | |
9076
- | +-------------------------------------------------+ |
9077
- | | 234 L999 $234 | |
9078
- | +-------------------------------------------------+ |
9079
- +--------------------------------------------------------------------------------------------------+
9080
- | MI A trailing minus sign if the value is negative. |
9081
- | The MI format element can appear only in the last |
9082
- | position of a number format. |
9083
- | Example: |
9084
- | +-------------------------------------------------+ |
9085
- | | data formatter result | |
9086
- | +-------------------------------------------------+ |
9087
- | | -1234 9999MI 1234- | |
9088
- | +-------------------------------------------------+ |
9089
- +--------------------------------------------------------------------------------------------------+
9090
- | PR A negative value in <angle brackets>, or |
9091
- | a positive value with a leading and trailing blank. |
9092
- | The PR format element can appear only in the last |
9093
- | position of a number format. |
9094
- | Example: |
9095
- | +-------------------------------------------------+ |
9096
- | | data formatter result | |
9097
- | +-------------------------------------------------+ |
9098
- | | -1234 9999PR <1234> | |
9099
- | +-------------------------------------------------+ |
9100
- +--------------------------------------------------------------------------------------------------+
9101
- | S A negative value with a leading or trailing minus sign. |
9102
- | a positive value with a leading or trailing plus sign. |
9103
- | The S format element can appear only in the first or |
9104
- | last position of a number format. |
9105
- | Example: |
9106
- | +-------------------------------------------------+ |
9107
- | | data formatter result | |
9108
- | +-------------------------------------------------+ |
9109
- | | +1234 S9999 +1234 | |
9110
- | +-------------------------------------------------+ |
9111
- +--------------------------------------------------------------------------------------------------+
9112
- | TM (text minimum format) Returns the smallest number of |
9113
- | characters possible. This element is case insensitive. |
9114
- | TM or TM9 return the number in fixed notation unless |
9115
- | the output exceeds 64 characters. If the output exceeds |
9116
- | 64 characters, the number is returned in scientific |
9117
- | notation. |
9118
- | TME returns the number in scientific notation with the |
9119
- | smallest number of characters. |
9120
- | You cannot precede this element with an other element. |
9121
- | You can follow this element only with one 9 or one E |
9122
- | (or e), but not with any combination of these. |
9123
- | Example: |
9124
- | +-------------------------------------------------+ |
9125
- | | data formatter result | |
9126
- | +-------------------------------------------------+ |
9127
- | | 1234 TM 1234 | |
9128
- | +-------------------------------------------------+ |
9129
- +--------------------------------------------------------------------------------------------------+
9130
- | U (dual currency) The string that represents the dual |
9131
- | currency as specified in the DualCurrency element |
9132
- | according to system settings. |
9133
- | Example: |
9134
- | +-------------------------------------------------+ |
9135
- | | data formatter result | |
9136
- | +-------------------------------------------------+ |
9137
- | | 1234 U9999 $1234 | |
9138
- | +-------------------------------------------------+ |
9139
- +--------------------------------------------------------------------------------------------------+
9140
- | V A value multiplied by 10 to the n (and, if necessary, |
9141
- | rounded up), where n is the number of 9's after the V. |
9142
- | Example: |
9143
- | +-------------------------------------------------+ |
9144
- | | data formatter result | |
9145
- | +-------------------------------------------------+ |
9146
- | | 1234 9999V99 123400 | |
9147
- | +-------------------------------------------------+ |
9148
- +--------------------------------------------------------------------------------------------------+
9149
- | X The hexadecimal value of the specified number of digits.|
9150
- | If the specified number is not an integer, the function |
9151
- | will round it to an integer. |
9152
- | This element accepts only positive values or zero. |
9153
- | Negative values return an error. You can precede this |
9154
- | element only with zero (which returns leading zeros) or |
9155
- | FM. Any other elements return an error. If you do not |
9156
- | specify zero or FM, the return always has one leading |
9157
- | blank. |
9158
- | Example: |
9159
- | +-------------------------------------------------+ |
9160
- | | data formatter result | |
9161
- | +-------------------------------------------------+ |
9162
- | | 1234 XXXX 4D2 | |
9163
- | +-------------------------------------------------+ |
9164
- +--------------------------------------------------------------------------------------------------+
9165
-
9166
- * Formatter for Date types:
9167
- +--------------------------------------------------------------------------------------------------+
9168
- | FORMATTER DESCRIPTION |
9169
- +--------------------------------------------------------------------------------------------------+
9170
- | - |
9171
- | / |
9172
- | , Punctuation characters are ignored and text enclosed in |
9173
- | . quotation marks is ignored. |
9174
- | ; |
9175
- | : |
9176
- | "text" |
9177
- | Example: |
9178
- | +-------------------------------------------------+ |
9179
- | | data formatter result | |
9180
- | +-------------------------------------------------+ |
9181
- | | 03/09/17 MM-DD 09-17 | |
9182
- | +-------------------------------------------------+ |
9183
- +--------------------------------------------------------------------------------------------------+
9184
- | AD AD indicator. |
9185
- | A.D. |
9186
- | Example: |
9187
- | +-------------------------------------------------+ |
9188
- | | data formatter result | |
9189
- | +-------------------------------------------------+ |
9190
- | | 03/09/17 CCAD 21AD | |
9191
- | +-------------------------------------------------+ |
9192
- +--------------------------------------------------------------------------------------------------+
9193
- | AM Meridian indicator. |
9194
- | A.M. |
9195
- | Example: |
9196
- | +-------------------------------------------------+ |
9197
- | | data formatter result | |
9198
- | +-------------------------------------------------+ |
9199
- | | 03/09/17 CCAM 21AM | |
9200
- | +-------------------------------------------------+ |
9201
- +--------------------------------------------------------------------------------------------------+
9202
- | BC |
9203
- | B.C. BC indicator. |
9204
- | Example: |
9205
- | +-------------------------------------------------+ |
9206
- | | data formatter result | |
9207
- | +-------------------------------------------------+ |
9208
- | | 03/09/17 CCBC 21BC | |
9209
- | +-------------------------------------------------+ |
9210
- +--------------------------------------------------------------------------------------------------+
9211
- | CC Century. |
9212
- | SCC If the last 2 digits of a 4-digit year are between 01 |
9213
- | and 99 inclusive, the century is 1 greater than the |
9214
- | first 2 digits of that year. |
9215
- | If the last 2 digits of a 4-digit year are 00, the |
9216
- | century is the same as the first 2 digits of that year. |
9217
- | Example: |
9218
- | +-------------------------------------------------+ |
9219
- | | data formatter result | |
9220
- | +-------------------------------------------------+ |
9221
- | | 03/09/17 CCBC 21BC | |
9222
- | +-------------------------------------------------+ |
9223
- +--------------------------------------------------------------------------------------------------+
9224
- | D Day of week (1-7). |
9225
- | Example: |
9226
- | +-------------------------------------------------+ |
9227
- | | data formatter result | |
9228
- | +-------------------------------------------------+ |
9229
- | | 03/09/17 D 4 | |
9230
- | +-------------------------------------------------+ |
9231
- +--------------------------------------------------------------------------------------------------+
9232
- | DAY Name of day. |
9233
- | Example: |
9234
- | +-------------------------------------------------+ |
9235
- | | data formatter result | |
9236
- | +-------------------------------------------------+ |
9237
- | | 03/09/17 DAY WEDNESDAY | |
9238
- | +-------------------------------------------------+ |
9239
- +--------------------------------------------------------------------------------------------------+
9240
- | DD Day of month (1-31). |
9241
- | Example: |
9242
- | +-------------------------------------------------+ |
9243
- | | data formatter result | |
9244
- | +-------------------------------------------------+ |
9245
- | | 03/09/17 DD 17 | |
9246
- | +-------------------------------------------------+ |
9247
- +--------------------------------------------------------------------------------------------------+
9248
- | DDD Day of year (1-366). |
9249
- | Example: |
9250
- | +-------------------------------------------------+ |
9251
- | | data formatter result | |
9252
- | +-------------------------------------------------+ |
9253
- | | 03/09/17 DDD 260 | |
9254
- | +-------------------------------------------------+ |
9255
- +--------------------------------------------------------------------------------------------------+
9256
- | DL Date Long. Equivalent to the format string ‘FMDay, |
9257
- | Month FMDD, YYYY’. |
9258
- | Example: |
9259
- | +-------------------------------------------------+ |
9260
- | | data formatter result | |
9261
- | +-------------------------------------------------+ |
9262
- | | 03/09/17 DL Wednesday, September 17, 2003| |
9263
- | +-------------------------------------------------+ |
9264
- +--------------------------------------------------------------------------------------------------+
9265
- | DS Date Short. Equivalent to the format string |
9266
- | ‘FMMM/DD/YYYYFM’. |
9267
- | Example: |
9268
- | +-------------------------------------------------+ |
9269
- | | data formatter result | |
9270
- | +-------------------------------------------------+ |
9271
- | | 03/09/17 DS 9/17/2003 | |
9272
- | +-------------------------------------------------+ |
9273
- +--------------------------------------------------------------------------------------------------+
9274
- | DY abbreviated name of day. |
9275
- | Example: |
9276
- | +-------------------------------------------------+ |
9277
- | | data formatter result | |
9278
- | +-------------------------------------------------+ |
9279
- | | 03/09/17 DY WED | |
9280
- | +-------------------------------------------------+ |
9281
- +--------------------------------------------------------------------------------------------------+
9282
- | FF [1..9] Fractional seconds. |
9283
- | Use [1..9] to specify the number of fractional digits. |
9284
- | FF without any number following it prints a decimal |
9285
- | followed by digits equal to the number of fractional |
9286
- | seconds in the input data type. If the data type has no |
9287
- | fractional digits, FF prints nothing. |
9288
- | Any fractional digits beyond 6 digits are truncated. |
9289
- | Example: |
9290
- | +-------------------------------------------------+ |
9291
- | | data formatter result | |
9292
- | +-------------------------------------------------+ |
9293
- | | 2016-01-06 09:08:01.000000 FF 000000 | |
9294
- | +-------------------------------------------------+ |
9295
- +--------------------------------------------------------------------------------------------------+
9296
- | HH |
9297
- | HH12 Hour of day (1-12). |
9298
- | Example: |
9299
- | +-------------------------------------------------+ |
9300
- | | data formatter result | |
9301
- | +-------------------------------------------------+ |
9302
- | | 2016-01-06 09:08:01.000000 HH 09 | |
9303
- | +-------------------------------------------------+ |
9304
- +--------------------------------------------------------------------------------------------------+
9305
- | HH24 Hour of the day (0-23). |
9306
- | Example: |
9307
- | +-------------------------------------------------+ |
9308
- | | data formatter result | |
9309
- | +-------------------------------------------------+ |
9310
- | | 2016-01-06 09:08:01.000000 HH24 09 | |
9311
- | +-------------------------------------------------+ |
9312
- +--------------------------------------------------------------------------------------------------+
9313
- | IW Week of year (1-52 or 1-53) based on ISO model. |
9314
- | Example: |
9315
- | +-------------------------------------------------+ |
9316
- | | data formatter result | |
9317
- | +-------------------------------------------------+ |
9318
- | | 2016-01-06 09:08:01.000000 IW 01 | |
9319
- | +-------------------------------------------------+ |
9320
- +--------------------------------------------------------------------------------------------------+
9321
- | IYY |
9322
- | IY Last 3, 2, or 1 digits of ISO year. |
9323
- | I |
9324
- | Example: |
9325
- | +-------------------------------------------------+ |
9326
- | | data formatter result | |
9327
- | +-------------------------------------------------+ |
9328
- | | 2016-01-06 09:08:01.000000 IY 16 | |
9329
- | +-------------------------------------------------+ |
9330
- +--------------------------------------------------------------------------------------------------+
9331
- | IYYY 4-digit year based on the ISO standard. |
9332
- | Example: |
9333
- | +-------------------------------------------------+ |
9334
- | | data formatter result | |
9335
- | +-------------------------------------------------+ |
9336
- | | 2016-01-06 09:08:01.000000 IYYY 2016 | |
9337
- | +-------------------------------------------------+ |
9338
- +--------------------------------------------------------------------------------------------------+
9339
- | J Julian day, the number of days since January 1, 4713 BC. |
9340
- | Number specified with J must be integers. |
9341
- | Teradata uses the Gregorian calendar in calculations to |
9342
- | and from Julian Days. |
9343
- | Example: |
9344
- | +-------------------------------------------------+ |
9345
- | | data formatter result | |
9346
- | +-------------------------------------------------+ |
9347
- | | 2016-01-06 09:08:01.000000 J 2457394 | |
9348
- | +-------------------------------------------------+ |
9349
- +--------------------------------------------------------------------------------------------------+
9350
- | MI Minute (0-59). |
9351
- | Example: |
9352
- | +-------------------------------------------------+ |
9353
- | | data formatter result | |
9354
- | +-------------------------------------------------+ |
9355
- | | 2016-01-06 09:08:01.000000 MI 08 | |
9356
- | +-------------------------------------------------+ |
9357
- +--------------------------------------------------------------------------------------------------+
9358
- | MM Month (01-12). |
9359
- | Example: |
9360
- | +-------------------------------------------------+ |
9361
- | | data formatter result | |
9362
- | +-------------------------------------------------+ |
9363
- | | 2016-01-06 09:08:01.000000 MM 01 | |
9364
- | +-------------------------------------------------+ |
9365
- +--------------------------------------------------------------------------------------------------+
9366
- | MON Abbreviated name of month. |
9367
- | Example: |
9368
- | +-------------------------------------------------+ |
9369
- | | data formatter result | |
9370
- | +-------------------------------------------------+ |
9371
- | | 2016-01-06 09:08:01.000000 MON JAN | |
9372
- | +-------------------------------------------------+ |
9373
- +--------------------------------------------------------------------------------------------------+
9374
- | MONTH Name of month. |
9375
- | Example: |
9376
- | +-------------------------------------------------+ |
9377
- | | data formatter result | |
9378
- | +-------------------------------------------------+ |
9379
- | | 2016-01-06 09:08:01.000000 MONTH JANUARY | |
9380
- | +-------------------------------------------------+ |
9381
- +--------------------------------------------------------------------------------------------------+
9382
- | PM |
9383
- | P.M. Meridian indicator. |
9384
- | Example: |
9385
- | +-------------------------------------------------+ |
9386
- | | data formatter result | |
9387
- | +-------------------------------------------------+ |
9388
- | | 2016-01-06 09:08:01.000000 HHPM 09PM | |
9389
- | +-------------------------------------------------+ |
9390
- +--------------------------------------------------------------------------------------------------+
9391
- | Q Quarter of year (1, 2, 3, 4). |
9392
- | Example: |
9393
- | +-------------------------------------------------+ |
9394
- | | data formatter result | |
9395
- | +-------------------------------------------------+ |
9396
- | | 2016-01-06 09:08:01.000000 Q 1 | |
9397
- | +-------------------------------------------------+ |
9398
- +--------------------------------------------------------------------------------------------------+
9399
- | RM Roman numeral month (I - XII). |
9400
- | Example: |
9401
- | +-------------------------------------------------+ |
9402
- | | data formatter result | |
9403
- | +-------------------------------------------------+ |
9404
- | | 2016-01-06 09:08:01.000000 RM I | |
9405
- | +-------------------------------------------------+ |
9406
- +--------------------------------------------------------------------------------------------------+
9407
- | SP Spelled. Any numeric element followed by SP is spelled in|
9408
- | English words. The words are capitalized according to how|
9409
- | the element is capitalized. |
9410
- | For example: 'DDDSP' specifies all uppercase, 'DddSP' |
9411
- | specifies that the first letter is capitalized, and |
9412
- | 'dddSP' specifies all lowercase. |
9413
- | Example: |
9414
- | +-------------------------------------------------+ |
9415
- | | data formatter result | |
9416
- | +-------------------------------------------------+ |
9417
- | | 2016-01-06 09:08:01.000000 HHSP NINE | |
9418
- | +-------------------------------------------------+ |
9419
- +--------------------------------------------------------------------------------------------------+
9420
- | SS Second (0-59). |
9421
- | Example: |
9422
- | +-------------------------------------------------+ |
9423
- | | data formatter result | |
9424
- | +-------------------------------------------------+ |
9425
- | | 2016-01-06 09:08:01.000000 SS 03 | |
9426
- | +-------------------------------------------------+ |
9427
- +--------------------------------------------------------------------------------------------------+
9428
- | SSSSS Seconds past midnight (0-86399). |
9429
- | Example: |
9430
- | +-------------------------------------------------+ |
9431
- | | data formatter result | |
9432
- | +-------------------------------------------------+ |
9433
- | | 2016-01-06 09:08:01.000000 SSSSS 32883 | |
9434
- | +-------------------------------------------------+ |
9435
- +--------------------------------------------------------------------------------------------------+
9436
- | TS Time Short. Equivalent to the format string |
9437
- | 'HH:MI:SS AM'. |
9438
- | Example: |
9439
- | +-------------------------------------------------+ |
9440
- | | data formatter result | |
9441
- | +-------------------------------------------------+ |
9442
- | | 2016-01-06 09:08:01.000000 TS 09:08:01 AM | |
9443
- | +-------------------------------------------------+ |
9444
- +--------------------------------------------------------------------------------------------------+
9445
- | TZH Time zone hour. |
9446
- | Example: |
9447
- | +-------------------------------------------------+ |
9448
- | | data formatter result | |
9449
- | +-------------------------------------------------+ |
9450
- | | 2016-01-06 09:08:01.000000 TZH +00 | |
9451
- | +-------------------------------------------------+ |
9452
- +--------------------------------------------------------------------------------------------------+
9453
- | TZM Time zone minute. |
9454
- | Example: |
9455
- | +-------------------------------------------------+ |
9456
- | | data formatter result | |
9457
- | +-------------------------------------------------+ |
9458
- | | 2016-01-06 09:08:01.000000 TZM 00 | |
9459
- | +-------------------------------------------------+ |
9460
- +--------------------------------------------------------------------------------------------------+
9461
- | TZR Time zone region. Equivalent to the format string |
9462
- | 'TZH:TZM'. |
9463
- | Example: |
9464
- | +-------------------------------------------------+ |
9465
- | | data formatter result | |
9466
- | +-------------------------------------------------+ |
9467
- | | 2016-01-06 09:08:01.000000 TZR +00:00 | |
9468
- | +-------------------------------------------------+ |
9469
- +--------------------------------------------------------------------------------------------------+
9470
- | WW Week of year (1-53) where week 1 starts on the first day |
9471
- | of the year and continues to the 7th day of the year. |
9472
- | Example: |
9473
- | +-------------------------------------------------+ |
9474
- | | data formatter result | |
9475
- | +-------------------------------------------------+ |
9476
- | | 2016-01-06 09:08:01.000000 WW 01 | |
9477
- | +-------------------------------------------------+ |
9478
- +--------------------------------------------------------------------------------------------------+
9479
- | W Week of month (1-5) where week 1 starts on the first day |
9480
- | of the month and ends on the seventh. |
9481
- | Example: |
9482
- | +-------------------------------------------------+ |
9483
- | | data formatter result | |
9484
- | +-------------------------------------------------+ |
9485
- | | 2016-01-06 09:08:01.000000 W 1 | |
9486
- | +-------------------------------------------------+ |
9487
- +--------------------------------------------------------------------------------------------------+
9488
- | X Local radix character. |
9489
- | Example: |
9490
- | +-------------------------------------------------+ |
9491
- | | data formatter result | |
9492
- | +-------------------------------------------------+ |
9493
- | | 2016-01-06 09:08:01.000000 MMXYY 01.16 | |
9494
- | +-------------------------------------------------+ |
9495
- +--------------------------------------------------------------------------------------------------+
9496
- | Y,YYY Year with comma in this position. |
9497
- | Example: |
9498
- | +-------------------------------------------------+ |
9499
- | | data formatter result | |
9500
- | +-------------------------------------------------+ |
9501
- | | 2016-01-06 09:08:01.000000 Y,YYY 2,016 | |
9502
- | +-------------------------------------------------+ |
9503
- +--------------------------------------------------------------------------------------------------+
9504
- | YEAR Year, spelled out. S prefixes BC dates with a minus sign.|
9505
- | SYEAR |
9506
- | Example: |
9507
- | +-------------------------------------------------+ |
9508
- | | data formatter result | |
9509
- | +-------------------------------------------------+ |
9510
- | | 2016-01-06 09:08:01.000000 YEAR TWENTY SIXTEEN| |
9511
- | +-------------------------------------------------+ |
9512
- +--------------------------------------------------------------------------------------------------+
9513
- | YYYY |
9514
- | SYYYY 4-digit year. S prefixes BC dates with a minus sign. |
9515
- | Example: |
9516
- | +-------------------------------------------------+ |
9517
- | | data formatter result | |
9518
- | +-------------------------------------------------+ |
9519
- | | 2016-01-06 09:08:01.000000 YYYY 2016 | |
9520
- | +-------------------------------------------------+ |
9521
- +--------------------------------------------------------------------------------------------------+
9522
- | YYY Last 3, 2, or 1 digit of year. |
9523
- | YY If the current year and the specified year are both in |
9524
- | Y the range of 0-49, the date is in the current century. |
9525
- | Example: |
9526
- | +-------------------------------------------------+ |
9527
- | | data formatter result | |
9528
- | +-------------------------------------------------+ |
9529
- | | 2016-01-06 09:08:01.000000 YY 16 | |
9530
- | +-------------------------------------------------+ |
9531
- +--------------------------------------------------------------------------------------------------+
9532
9359
 
9533
9360
  RAISES:
9534
9361
  TypeError, ValueError, TeradataMlException
@@ -9654,156 +9481,11 @@ class _SQLColumnExpression(_LogicalColumnExpression,
9654
9481
  Specifies a variable length string containing formatting characters
9655
9482
  that define the format of the columns.
9656
9483
  Type: str OR ColumnExpression
9657
- Note:
9484
+ Notes:
9658
9485
  * If 'formatter' is omitted, numeric values is converted to a string exactly
9659
9486
  long enough to hold its significant digits.
9660
-
9661
- * Formatters:
9662
- +--------------------------------------------------------------------------------------------------+
9663
- | FORMATTER DESCRIPTION |
9664
- +--------------------------------------------------------------------------------------------------+
9665
- | , (comma) A comma in the specified position. |
9666
- | A comma cannot begin a number format. |
9667
- | A comma cannot appear to the right of a decimal |
9668
- | character or period in a number format. |
9669
- | Example: |
9670
- | +-------------------------------------------------+ |
9671
- | | data formatter result | |
9672
- | +-------------------------------------------------+ |
9673
- | | "1,234" "9,999" 1234 | |
9674
- | +-------------------------------------------------+ |
9675
- +--------------------------------------------------------------------------------------------------+
9676
- | . (period) A decimal point. Only one allowed in a format. |
9677
- | Example: |
9678
- | +-------------------------------------------------+ |
9679
- | | data formatter result | |
9680
- | +-------------------------------------------------+ |
9681
- | | "12.34" "99.99" 12.34 | |
9682
- | +-------------------------------------------------+ |
9683
- +--------------------------------------------------------------------------------------------------+
9684
- | $ A value with a leading dollar sign. |
9685
- | Example: |
9686
- | +-------------------------------------------------+ |
9687
- | | data formatter result | |
9688
- | +-------------------------------------------------+ |
9689
- | | "$1234" "$9999" 1234 | |
9690
- | +-------------------------------------------------+ |
9691
- +--------------------------------------------------------------------------------------------------+
9692
- | 0 Leading or trailing zeros. |
9693
- | Example: |
9694
- | +-------------------------------------------------+ |
9695
- | | data formatter result | |
9696
- | +-------------------------------------------------+ |
9697
- | | "0123" "0999" 123 | |
9698
- | | "1230" "9990" 1230 | |
9699
- | +-------------------------------------------------+ |
9700
- +--------------------------------------------------------------------------------------------------+
9701
- | 9 Specified number of digits. |
9702
- | Leading space if positive, minus if negative. |
9703
- | Example: |
9704
- | +-------------------------------------------------+ |
9705
- | | data formatter result | |
9706
- | +-------------------------------------------------+ |
9707
- | | "1234" "9999" 1234 | |
9708
- | | "-1234" "9999" -1234 | |
9709
- | +-------------------------------------------------+ |
9710
- +--------------------------------------------------------------------------------------------------+
9711
- | B Blanks if integer part is zero. |
9712
- | Example: |
9713
- | +-------------------------------------------------+ |
9714
- | | data formatter result | |
9715
- | +-------------------------------------------------+ |
9716
- | | "0" "B9999" 0 | |
9717
- | +-------------------------------------------------+ |
9718
- +--------------------------------------------------------------------------------------------------+
9719
- | C ISO currency symbol (from SDF ISOCurrency). |
9720
- | Example: |
9721
- | +-------------------------------------------------+ |
9722
- | | data formatter result | |
9723
- | +-------------------------------------------------+ |
9724
- | | "USD123" "C999" 123 | |
9725
- | +-------------------------------------------------+ |
9726
- +--------------------------------------------------------------------------------------------------+
9727
- | D Radix separator for non-monetary values. |
9728
- | From SDF RadixSeparator. |
9729
- | Example: |
9730
- | +-------------------------------------------------+ |
9731
- | | data formatter result | |
9732
- | +-------------------------------------------------+ |
9733
- | | "12.34" "99D99" 12.34 | |
9734
- | +-------------------------------------------------+ |
9735
- +--------------------------------------------------------------------------------------------------+
9736
- | EEEE Scientific notation. |
9737
- | Example: |
9738
- | +-------------------------------------------------+ |
9739
- | | data formatter result | |
9740
- | +-------------------------------------------------+ |
9741
- | | "1.2E+04" "9.9EEEE" 12000 | |
9742
- | +-------------------------------------------------+ |
9743
- +--------------------------------------------------------------------------------------------------+
9744
- | G Group separator for non-monetary values. |
9745
- | From SDF GroupSeparator. |
9746
- | Example: |
9747
- | +-------------------------------------------------+ |
9748
- | | data formatter result | |
9749
- | +-------------------------------------------------+ |
9750
- | | "1,234,567" "9G999G999" 1234567 | |
9751
- | +-------------------------------------------------+ |
9752
- +--------------------------------------------------------------------------------------------------+
9753
- | L Local currency (from SDF Currency element). |
9754
- | Example: |
9755
- | +-------------------------------------------------+ |
9756
- | | data formatter result | |
9757
- | +-------------------------------------------------+ |
9758
- | | "$123" "L999" 123 | |
9759
- | +-------------------------------------------------+ |
9760
- +--------------------------------------------------------------------------------------------------+
9761
- | MI Trailing minus sign if value is negative. |
9762
- | Can only appear in the last position. |
9763
- | Example: |
9764
- | +-------------------------------------------------+ |
9765
- | | data formatter result | |
9766
- | +-------------------------------------------------+ |
9767
- | | "1234-" "9999MI" -1234 | |
9768
- | +-------------------------------------------------+ |
9769
- +--------------------------------------------------------------------------------------------------+
9770
- | PR Negative value in angle brackets. |
9771
- | Positive value with leading/trailing blank. |
9772
- | Only in the last position. |
9773
- | Example: |
9774
- | +-------------------------------------------------+ |
9775
- | | data formatter result | |
9776
- | +-------------------------------------------------+ |
9777
- | | " 123 " "9999PR" 123 | |
9778
- | +-------------------------------------------------+ |
9779
- +--------------------------------------------------------------------------------------------------+
9780
- | S Sign indicator: + / - at beginning or end. |
9781
- | Can only appear in first or last position. |
9782
- | Example: |
9783
- | +-------------------------------------------------+ |
9784
- | | data formatter result | |
9785
- | +-------------------------------------------------+ |
9786
- | | "-1234" "S9999" -1234 | |
9787
- | +-------------------------------------------------+ |
9788
- +--------------------------------------------------------------------------------------------------+
9789
- | U Dual currency (from SDF DualCurrency). |
9790
- | Example: |
9791
- | +-------------------------------------------------+ |
9792
- | | data formatter result | |
9793
- | +-------------------------------------------------+ |
9794
- | | "$123" "U999" 123 | |
9795
- | +-------------------------------------------------+ |
9796
- +--------------------------------------------------------------------------------------------------+
9797
- | X Hexadecimal format. |
9798
- | Accepts only non-negative values. |
9799
- | Must be preceded by 0 or FM. |
9800
- | Example: |
9801
- | +-------------------------------------------------+ |
9802
- | | data formatter result | |
9803
- | +-------------------------------------------------+ |
9804
- | | "FF" "XX" 255 | |
9805
- | +-------------------------------------------------+ |
9806
- +--------------------------------------------------------------------------------------------------+
9487
+ * Get the supported formatters using `get_formatters("NUMERIC")` function.
9488
+
9807
9489
  RAISES:
9808
9490
  TypeError, ValueError, TeradataMlException
9809
9491
 
@@ -9877,213 +9559,10 @@ class _SQLColumnExpression(_LogicalColumnExpression,
9877
9559
  Specifies a variable length string containing formatting characters
9878
9560
  that define the format of column.
9879
9561
  Type: str
9880
- Note:
9562
+ Notes:
9881
9563
  * If "formatter" is omitted, the following default date format is used: 'YYYY-MM-DD'
9882
- * formatter for date type:
9883
- +--------------------------------------------------------------------------------------------------+
9884
- | FORMATTER DESCRIPTION |
9885
- +--------------------------------------------------------------------------------------------------+
9886
- | - |
9887
- | / |
9888
- | , Punctuation characters are ignored and text enclosed in |
9889
- | . quotation marks is ignored. |
9890
- | ; |
9891
- | : |
9892
- | "text" |
9893
- | Example: Date with value '2003-12-10' |
9894
- | +-------------------------------------------------+ |
9895
- | | data formatter value | |
9896
- | +-------------------------------------------------+ |
9897
- | | '2003-12-10' YYYY-MM-DD 03/12/10 | |
9898
- | +-------------------------------------------------+ |
9899
- +--------------------------------------------------------------------------------------------------+
9900
- | D Day of week (1-7). |
9901
- | Example: day of week with value '2' |
9902
- | +-------------------------------------------------+ |
9903
- | | data formatter value | |
9904
- | +-------------------------------------------------+ |
9905
- | | 2 D 24/01/01 | |
9906
- | +-------------------------------------------------+ |
9907
- +--------------------------------------------------------------------------------------------------+
9908
- | DAY Name of day. |
9909
- | Example: Date with value '2024-TUESDAY-01-30' |
9910
- | +-------------------------------------------------+ |
9911
- | | data formatter value | |
9912
- | +-------------------------------------------------+ |
9913
- | | 2024-TUESDAY-01-30 YYYY-DAY-MM-DD 24/01/30 | |
9914
- | +-------------------------------------------------+ |
9915
- +--------------------------------------------------------------------------------------------------+
9916
- | DD Day of month (1-31). |
9917
- | Example: Date with value '2003-10-25' |
9918
- | +-------------------------------------------------+ |
9919
- | | data formatter value | |
9920
- | +-------------------------------------------------+ |
9921
- | | 2003-10-25 YYYY-MM-DD 03/10/25 | |
9922
- | +-------------------------------------------------+ |
9923
- +--------------------------------------------------------------------------------------------------+
9924
- | DDD Day of year (1-366). |
9925
- | Example: Date with value '2024-366' |
9926
- | +-------------------------------------------------+ |
9927
- | | data formatter value | |
9928
- | +-------------------------------------------------+ |
9929
- | | 2024-366 YYYY-DDD 24/12/31 | |
9930
- | +-------------------------------------------------+ |
9931
- +--------------------------------------------------------------------------------------------------+
9932
- | DY abbreviated name of day. |
9933
- | Example: Date with value '2024-Mon-01-29' |
9934
- | +-------------------------------------------------+ |
9935
- | | data formatter value | |
9936
- | +-------------------------------------------------+ |
9937
- | | 2024-Mon-01-29 YYYY-DY-MM-DD 24/01/29 | |
9938
- | +-------------------------------------------------+ |
9939
- +--------------------------------------------------------------------------------------------------+
9940
- | HH |
9941
- | HH12 Hour of day (1-12). |
9942
- | Example: Date with value '2016-01-06 09:08:01' |
9943
- | +-------------------------------------------------+ |
9944
- | | data formatter value | |
9945
- | +-------------------------------------------------+ |
9946
- | | 2016-01-06 09:08:01 YYYY-MM-DD HH:MI:SS 6/01/06| |
9947
- | +-------------------------------------------------+ |
9948
- +--------------------------------------------------------------------------------------------------+
9949
- | HH24 Hour of the day (0-23). |
9950
- | Example: Date with value '2016-01-06 23:08:01' |
9951
- | +----------------------------------------------------+ |
9952
- | | data formatter value | |
9953
- | +----------------------------------------------------+ |
9954
- | | 2016-01-06 23:08:01 YYYY-MM-DD HH24:MI:SS 6/01/06 | |
9955
- | +----------------------------------------------------+ |
9956
- +--------------------------------------------------------------------------------------------------+
9957
- | J Julian day, the number of days since January 1, 4713 BC. |
9958
- | Number specified with J must be integers. |
9959
- | Teradata uses the Gregorian calendar in calculations to |
9960
- | and from Julian Days. |
9961
- | Example: Number of julian days with value '2457394' |
9962
- | +-------------------------------------------------+ |
9963
- | | data formatter value | |
9964
- | +-------------------------------------------------+ |
9965
- | | 2457394 J 16/01/06 | |
9966
- | +-------------------------------------------------+ |
9967
- +--------------------------------------------------------------------------------------------------+
9968
- | MI Minute (0-59). |
9969
- | Example: Date with value '2016-01-06 23:08:01' |
9970
- | +----------------------------------------------------+ |
9971
- | | data formatter value | |
9972
- | +----------------------------------------------------+ |
9973
- | | 2016-01-06 23:08:01 YYYY-MM-DD HH24:MI:SS 6/01/06 | |
9974
- | +----------------------------------------------------+ |
9975
- +--------------------------------------------------------------------------------------------------+
9976
- | MM Month (01-12). |
9977
- | Example: Date with value '2016-01-06 23:08:01' |
9978
- | +----------------------------------------------------+ |
9979
- | | data formatter value | |
9980
- | +----------------------------------------------------+ |
9981
- | | 2016-01-06 23:08:01 YYYY-MM-DD HH24:MI:SS 6/01/06 | |
9982
- | +----------------------------------------------------+ |
9983
- +--------------------------------------------------------------------------------------------------+
9984
- | MON Abbreviated name of month. |
9985
- | Example: Date with value '2016-JAN-06' |
9986
- | +----------------------------------------------------+ |
9987
- | | data formatter value | |
9988
- | +----------------------------------------------------+ |
9989
- | | 2016-JAN-06 YYYY-MON-DD 16/01/06 | |
9990
- | +----------------------------------------------------+ |
9991
- +--------------------------------------------------------------------------------------------------+
9992
- | MONTH Name of month. |
9993
- | Example: Date with value '2016-JANUARY-06' |
9994
- | +-------------------------------------------------+ |
9995
- | | data formatter value | |
9996
- | +-------------------------------------------------+ |
9997
- | | 2016-JANUARY-06 YYYY-MONTH-DD 16/01/06 | |
9998
- | +-------------------------------------------------+ |
9999
- +--------------------------------------------------------------------------------------------------+
10000
- | PM |
10001
- | P.M. Meridian indicator. |
10002
- | Example: Date with value '2016-01-06 23:08:01 PM' |
10003
- | +---------------------------------------------------------+ |
10004
- | | data formatter value | |
10005
- | +---------------------------------------------------------+ |
10006
- | | 2016-01-06 23:08:01 PM YYYY-MM-DD HH24:MI:SS PM 16/01/06| |
10007
- | +---------------------------------------------------------+ |
10008
- +--------------------------------------------------------------------------------------------------+
10009
- | RM Roman numeral month (I - XII). |
10010
- | Example: Date with value '2024-XII' |
10011
- | +-------------------------------------------------+ |
10012
- | | data formatter value | |
10013
- | +-------------------------------------------------+ |
10014
- | | 2024-XII YYYY-RM 24/12/01 | |
10015
- | +-------------------------------------------------+ |
10016
- +--------------------------------------------------------------------------------------------------+
10017
- | RR Stores 20th century dates in the 21st century using only |
10018
- | 2 digits. If the current year and the specified year are |
10019
- | both in the range of 0-49, the date is in the current |
10020
- | century. |
10021
- | Example: Date with value '2024-365, 21' |
10022
- | +-------------------------------------------------+ |
10023
- | | data formatter value | |
10024
- | +-------------------------------------------------+ |
10025
- | | 2024-365, 21 YYYY-DDD, RR 21/12/31 | |
10026
- | +-------------------------------------------------+ |
10027
- +--------------------------------------------------------------------------------------------------+
10028
- | RRRR Round year. Accepts either 4-digit or 2-digit input. |
10029
- | 2-digit input provides the same return as RR. |
10030
- | Example: Date with value '2024-365, 21' |
10031
- | +-------------------------------------------------+ |
10032
- | | data formatter value | |
10033
- | +-------------------------------------------------+ |
10034
- | | 2024-365, 21 YYYY-DDD, RRRR 24/12/31 | |
10035
- | +-------------------------------------------------+ |
10036
- +--------------------------------------------------------------------------------------------------+
10037
- | SS Second (0-59). |
10038
- | Example: Date with value '2016-01-06 23:08:01' |
10039
- | +----------------------------------------------------+ |
10040
- | | data formatter value | |
10041
- | +----------------------------------------------------+ |
10042
- | | 2016-01-06 23:08:01 YYYY-MM-DD HH24:MI:SS 6/01/06 | |
10043
- | +----------------------------------------------------+ |
10044
- +--------------------------------------------------------------------------------------------------+
10045
- | SSSSS Seconds past midnight (0-86399). |
10046
- +--------------------------------------------------------------------------------------------------+
10047
- | TZH Time zone hour. |
10048
- +--------------------------------------------------------------------------------------------------+
10049
- | TZM Time zone minute. |
10050
- +--------------------------------------------------------------------------------------------------+
10051
- | X Local radix character. |
10052
- | Example: Date with value '2024.366' |
10053
- | +-------------------------------------------------+ |
10054
- | | data formatter value | |
10055
- | +-------------------------------------------------+ |
10056
- | | 2024.366 YYYYXDDD 24/12/31 | |
10057
- | +-------------------------------------------------+ |
10058
- +--------------------------------------------------------------------------------------------------+
10059
- | Y,YYY Year with comma in this position. |
10060
- | Example: Date with value '2,024-366' |
10061
- | +-------------------------------------------------+ |
10062
- | | data formatter value | |
10063
- | +-------------------------------------------------+ |
10064
- | | 2,024-366 Y,YYY-DDD 24/12/31 | |
10065
- | +-------------------------------------------------+ |
10066
- +--------------------------------------------------------------------------------------------------+
10067
- | YYYY |
10068
- | SYYYY 4-digit year. S prefixes BC dates with a minus sign. |
10069
- | Example: Date with value '2024-366' |
10070
- | +-------------------------------------------------+ |
10071
- | | data formatter value | |
10072
- | +-------------------------------------------------+ |
10073
- | | 2024-366 YYYY-DDD 24/12/31 | |
10074
- | +-------------------------------------------------+ |
10075
- +--------------------------------------------------------------------------------------------------+
10076
- | YYY Last 3, 2, or 1 digit of year. |
10077
- | YY If the current year and the specified year are both in |
10078
- | Y the range of 0-49, the date is in the current century. |
10079
- | Example: Date with value '24-366' |
10080
- | +-------------------------------------------------+ |
10081
- | | data formatter value | |
10082
- | +-------------------------------------------------+ |
10083
- | | 24-366 YY-DDD 24/12/31 | |
10084
- | +-------------------------------------------------+ |
10085
- +--------------------------------------------------------------------------------------------------+
10086
-
9564
+ * Get the supported formatters using `get_formatters("DATE")` function.
9565
+
10087
9566
  RAISES:
10088
9567
  TypeError, ValueError, TeradataMlException
10089
9568
 
@@ -10159,7 +9638,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10159
9638
  if formatter:
10160
9639
  _args.append(formatter)
10161
9640
  return _SQLColumnExpression(func.to_date(*_args), type=DATE())
10162
-
9641
+
10163
9642
  def trunc(self, expression=0, formatter=None):
10164
9643
  """
10165
9644
  DESCRIPTION:
@@ -10908,7 +10387,7 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10908
10387
  value = value.expression if isinstance(value, _SQLColumnExpression) else value
10909
10388
  return _SQLColumnExpression(_fun(self.expression, value), type=type_)
10910
10389
 
10911
- def parse_url(self, url_part):
10390
+ def parse_url(self, url_part, key=None):
10912
10391
  """
10913
10392
  DESCRIPTION:
10914
10393
  Extracts a specific part from the URL.
@@ -10920,6 +10399,13 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10920
10399
  Permitted Values: HOST, PATH, QUERY, REF, PROTOCOL, FILE, AUTHORITY, USERINFO
10921
10400
  Type: str or ColumnExpression
10922
10401
 
10402
+ key:
10403
+ Optional Argument.
10404
+ Specifies the key to be used for extracting the value from the query string.
10405
+ Note:
10406
+ * Applicable only when url_part is set to 'QUERY'.
10407
+ Type: str or ColumnExpression
10408
+
10923
10409
  Returns:
10924
10410
  ColumnExpression
10925
10411
 
@@ -10930,43 +10416,96 @@ class _SQLColumnExpression(_LogicalColumnExpression,
10930
10416
  # Create a DataFrame on 'url_data' table.
10931
10417
  >>> df = DataFrame("url_data")
10932
10418
  >>> df
10933
- urls part
10934
- id
10935
- 3 https://www.facebook.com HOST
10936
- 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO
10937
- 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY
10938
- 2 https://example.net/path4/path5/path6?query4=value4#fragment REF
10939
- 0 http://example.com:8080/path FILE
10940
- 1 ftp://example.net:21/path PATH
10941
- 5 http://pg.example.ml/path150#fragment90 AUTHORITY
10942
- 7 https://www.google.com PROTOCOL
10943
-
10944
- # Example 1: Extract components from column 'urls' using column 'part'
10419
+ urls part query_key
10420
+ id
10421
+ 3 https://www.facebook.com HOST facebook.com
10422
+ 8 http://example.com/api?query1=value1&query2=value2 QUERY query1
10423
+ 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO password
10424
+ 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None
10425
+ 0 http://example.com:8080/path FILE path
10426
+ 2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment3
10427
+ 1 ftp://example.net:21/path PATH path
10428
+ 5 http://pg.example.ml/path150#fragment90 AUTHORITY fragment90
10429
+ 7 https://www.google.com PROTOCOL google.com
10430
+
10431
+ # Example 1: Extract components from column 'urls' using column 'part'.
10945
10432
  >>> df.assign(col = df.urls.parse_url(df.part))
10946
- urls part col
10947
- id
10948
- 3 https://www.facebook.com HOST www.facebook.com
10949
- 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO user:password
10950
- 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None
10951
- 2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment
10952
- 0 http://example.com:8080/path FILE /path
10953
- 1 ftp://example.net:21/path PATH /path
10954
- 5 http://pg.example.ml/path150#fragment90 AUTHORITY pg.example.ml
10955
- 7 https://www.google.com PROTOCOL https
10956
- >>>
10433
+ urls part query_key col
10434
+ id
10435
+ 3 https://www.facebook.com HOST facebook.com www.facebook.com
10436
+ 8 http://example.com/api?query1=value1&query2=value2 QUERY query1 query1=value1&query2=value2
10437
+ 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO password user:password
10438
+ 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None None
10439
+ 0 http://example.com:8080/path FILE path /path
10440
+ 2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment3 fragment
10441
+ 1 ftp://example.net:21/path PATH path /path
10442
+ 5 http://pg.example.ml/path150#fragment90 AUTHORITY fragment90 pg.example.ml
10443
+ 7 https://www.google.com PROTOCOL google.com https
10444
+
10445
+ # Example 2: Extract components from column 'urls' using 'part' and
10446
+ # 'query_key' column.
10447
+ >>> df.assign(col = df.urls.parse_url(df.part, df.query_key))
10448
+ urls part query_key col
10449
+ id
10450
+ 3 https://www.facebook.com HOST facebook.com None
10451
+ 8 http://example.com/api?query1=value1&query2=value2 QUERY query1 value1
10452
+ 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO password None
10453
+ 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None None
10454
+ 0 http://example.com:8080/path FILE path None
10455
+ 2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment3 None
10456
+ 1 ftp://example.net:21/path PATH path None
10457
+ 5 http://pg.example.ml/path150#fragment90 AUTHORITY fragment90 None
10458
+ 7 https://www.google.com PROTOCOL google.com None
10459
+
10460
+ # Extract components from column 'urls' using 'part' and 'query_key' str.
10461
+ >>> df.assign(col = df.urls.parse_url('QUERY', 'query2'))
10462
+ urls part query_key col
10463
+ id
10464
+ 3 https://www.facebook.com HOST facebook.com None
10465
+ 8 http://example.com/api?query1=value1&query2=value2 QUERY query1 value2
10466
+ 6 smtp://user:password@smtp.example.com:21/file.txt USERINFO password None
10467
+ 4 https://teracloud-pod-services-pod-account-service.dummyvalu QUERY None None
10468
+ 0 http://example.com:8080/path FILE path None
10469
+ 2 https://example.net/path4/path5/path6?query4=value4#fragment REF fragment3 None
10470
+ 1 ftp://example.net:21/path PATH path None
10471
+ 5 http://pg.example.ml/path150#fragment90 AUTHORITY fragment90 None
10472
+ 7 https://www.google.com PROTOCOL google.com None
10957
10473
  """
10958
10474
 
10959
10475
  # Validating Arguments
10960
10476
  arg_type_matrix = []
10961
10477
  arg_type_matrix.append(["url_part", url_part, False, (str, ColumnExpression), True])
10478
+ arg_type_matrix.append(["key", key, True, (str, ColumnExpression), True])
10962
10479
  _Validators._validate_function_arguments(arg_type_matrix)
10963
10480
 
10481
+ # If key is provided and url_part is QUERY, then use regex to extract the value.
10482
+ if key is not None:
10483
+ query_expr = _SQLColumnExpression(func.regexp_substr(func.regexp_substr(self.expression,
10484
+ '[?&]' + (key.expression if isinstance(key, _SQLColumnExpression) else key) + '=([^&]*)'), '[^=]*$'), type=VARCHAR())
10485
+ # If url_part is a column expression, then use case statement to extract the value.
10486
+ if isinstance(url_part, _SQLColumnExpression):
10487
+ whens = [(url_part == 'HOST', None),
10488
+ (url_part == 'PATH', None ),
10489
+ (url_part == 'QUERY', query_expr),
10490
+ (url_part == 'REF', None),
10491
+ (url_part == 'PROTOCOL', None),
10492
+ (url_part == 'FILE',None),
10493
+ (url_part == 'AUTHORITY', None),
10494
+ (url_part == 'USERINFO', None)]
10495
+
10496
+ from teradataml.dataframe.sql_functions import case
10497
+ return case(whens)
10498
+
10499
+ # If url_part is a string, then return the query expression directly.
10500
+ if isinstance(url_part, str) and url_part == 'QUERY':
10501
+ return query_expr
10502
+
10964
10503
  # Regex pattern used to extract 'url_part' is '^(([^:/?#]+):)?(//([^/?#]*))?([^?#]*)(\?([^#]*))?(#(.*))?'.
10965
10504
  # teradataml does not support regex grouping hence in some cases first used 'regex_replace' and
10966
10505
  # then 'regex_substr' or vice-versa.
10967
10506
  _part_to_extract_dict = {'HOST': _SQLColumnExpression(
10968
- func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), '(//[^/?#]+@)|(//)|(:\d+)', ''),
10969
- type=VARCHAR()),
10507
+ func.regexp_replace(func.regexp_substr(self.expression, '//([^/?#]*)'), '(//[^/?#]+@)|(//)|(:\d+)', ''),
10508
+ type=VARCHAR()),
10970
10509
  'PATH': _SQLColumnExpression(func.regexp_substr(
10971
10510
  func.regexp_replace(self.expression, '^(([^:/?#]+):)?(//([^/?#]*))?', ''),
10972
10511
  '([^?#]*)'), type=VARCHAR()),