teradataml 20.0.0.4__py3-none-any.whl → 20.0.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (131) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +182 -13
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +8 -13
  6. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  7. teradataml/analytics/sqle/__init__.py +16 -1
  8. teradataml/analytics/utils.py +60 -1
  9. teradataml/automl/__init__.py +290 -106
  10. teradataml/automl/autodataprep/__init__.py +471 -0
  11. teradataml/automl/data_preparation.py +29 -10
  12. teradataml/automl/data_transformation.py +11 -0
  13. teradataml/automl/feature_engineering.py +64 -4
  14. teradataml/automl/feature_exploration.py +639 -25
  15. teradataml/automl/model_training.py +1 -1
  16. teradataml/clients/auth_client.py +12 -8
  17. teradataml/clients/keycloak_client.py +165 -0
  18. teradataml/common/constants.py +71 -26
  19. teradataml/common/exceptions.py +32 -0
  20. teradataml/common/messagecodes.py +28 -0
  21. teradataml/common/messages.py +13 -4
  22. teradataml/common/sqlbundle.py +3 -2
  23. teradataml/common/utils.py +345 -45
  24. teradataml/context/context.py +259 -93
  25. teradataml/data/apriori_example.json +22 -0
  26. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  27. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  28. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  29. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  30. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  31. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  32. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  33. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  34. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  35. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  36. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  37. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  38. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  39. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  40. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  41. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  42. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  43. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  45. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  46. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  47. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  48. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  49. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  50. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  51. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  52. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  53. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  54. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  55. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  56. teradataml/data/jsons/byom/onnxembeddings.json +1 -0
  57. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  58. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  59. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  60. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  61. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  62. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  63. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  64. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  65. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  66. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  67. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  68. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  69. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  70. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  71. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  72. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  73. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  74. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +2 -2
  75. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +1 -1
  76. teradataml/data/ner_dict.csv +8 -0
  77. teradataml/data/ner_input_eng.csv +7 -0
  78. teradataml/data/ner_rule.csv +5 -0
  79. teradataml/data/pattern_matching_data.csv +11 -0
  80. teradataml/data/pos_input.csv +40 -0
  81. teradataml/data/sdk/modelops/modelops_spec.json +101737 -0
  82. teradataml/data/tdnerextractor_example.json +14 -0
  83. teradataml/data/teradataml_example.json +21 -1
  84. teradataml/data/textmorph_example.json +5 -0
  85. teradataml/data/to_num_data.csv +4 -0
  86. teradataml/data/tochar_data.csv +5 -0
  87. teradataml/data/trans_dense.csv +16 -0
  88. teradataml/data/trans_sparse.csv +55 -0
  89. teradataml/data/url_data.csv +10 -9
  90. teradataml/dataframe/copy_to.py +38 -27
  91. teradataml/dataframe/data_transfer.py +61 -45
  92. teradataml/dataframe/dataframe.py +1110 -132
  93. teradataml/dataframe/dataframe_utils.py +73 -27
  94. teradataml/dataframe/functions.py +1070 -9
  95. teradataml/dataframe/sql.py +750 -959
  96. teradataml/dbutils/dbutils.py +33 -13
  97. teradataml/dbutils/filemgr.py +14 -10
  98. teradataml/hyperparameter_tuner/utils.py +4 -2
  99. teradataml/lib/aed_0_1.dll +0 -0
  100. teradataml/opensource/_base.py +12 -157
  101. teradataml/options/configure.py +24 -9
  102. teradataml/scriptmgmt/UserEnv.py +317 -39
  103. teradataml/scriptmgmt/lls_utils.py +456 -135
  104. teradataml/sdk/README.md +79 -0
  105. teradataml/sdk/__init__.py +4 -0
  106. teradataml/sdk/_auth_modes.py +422 -0
  107. teradataml/sdk/_func_params.py +487 -0
  108. teradataml/sdk/_json_parser.py +453 -0
  109. teradataml/sdk/_openapi_spec_constants.py +249 -0
  110. teradataml/sdk/_utils.py +236 -0
  111. teradataml/sdk/api_client.py +897 -0
  112. teradataml/sdk/constants.py +62 -0
  113. teradataml/sdk/modelops/__init__.py +98 -0
  114. teradataml/sdk/modelops/_client.py +406 -0
  115. teradataml/sdk/modelops/_constants.py +304 -0
  116. teradataml/sdk/modelops/models.py +2308 -0
  117. teradataml/sdk/spinner.py +107 -0
  118. teradataml/store/__init__.py +1 -1
  119. teradataml/table_operators/Apply.py +16 -1
  120. teradataml/table_operators/Script.py +20 -1
  121. teradataml/table_operators/query_generator.py +4 -21
  122. teradataml/table_operators/table_operator_util.py +58 -9
  123. teradataml/utils/dtypes.py +4 -2
  124. teradataml/utils/internal_buffer.py +22 -2
  125. teradataml/utils/utils.py +0 -1
  126. teradataml/utils/validators.py +318 -58
  127. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/METADATA +188 -14
  128. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/RECORD +131 -84
  129. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/WHEEL +0 -0
  130. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/top_level.txt +0 -0
  131. {teradataml-20.0.0.4.dist-info → teradataml-20.0.0.6.dist-info}/zip-safe +0 -0
@@ -0,0 +1,138 @@
1
+ def Apriori(data=None, target_column=None, id_column=None, partition_columns=None,
2
+ max_len=2, delimiter=",", is_dense_input=False, patterns_or_rules=None,
3
+ support=0.01, **generic_arguments):
4
+ """
5
+ DESCRIPTION:
6
+ The Apriori() function finds patterns and calculates different statistical metrics to
7
+ understand the influence of the occurrence of a set of items on others.
8
+
9
+ PARAMETERS:
10
+ data:
11
+ Required Argument.
12
+ Specifies the input teradataml DataFrame.
13
+ Types: teradataml DataFrame
14
+
15
+ target_column:
16
+ Required Argument.
17
+ Specifies the input teradataml DataFrame column which contains the data to filter.
18
+ Types: str
19
+
20
+ id_column:
21
+ Optional Argument.
22
+ Specifies the name of the column that uniquely groups the items that are purchased together.
23
+ Applicable only when `is_dense_input` is False.
24
+ Types: str
25
+
26
+ partition_columns:
27
+ Optional Argument.
28
+ Specifies the column name(s) in the "data" to partition the input.
29
+ Types: str or list of str
30
+
31
+ max_len:
32
+ Optional Argument.
33
+ Specifies the maximum number of items in the item set.
34
+ "max_len" must be greater than or equal to 1 and less than or equal to 20.
35
+ Default Value: 2
36
+ Types: int
37
+
38
+ delimiter:
39
+ Optional Argument, Required when "is_dense_input" is set to True.
40
+ Specifies a character or string that separates words in the input text.
41
+ Default Value: ","
42
+ Types: str
43
+
44
+
45
+ is_dense_input:
46
+ Optional Argument.
47
+ Specifies whether input data is in dense format or not.
48
+ When set to True, function considers the data is in dense format.
49
+ Otherwise function considers data is not in dense format.
50
+ Default Value: False
51
+ Types: bool
52
+
53
+ patterns_or_rules:
54
+ Optional Argument.
55
+ Specifies whether to emit PATTERNS or RULES as output.
56
+ Permitted Values: "PATTERNS", "RULES"
57
+ Types: str
58
+
59
+ support:
60
+ Optional Argument.
61
+ Specifies the support value (minimum occurrence threshold) of the itemset.
62
+ Default Value: 0.01
63
+ Types: float
64
+
65
+ **generic_arguments:
66
+ Optional Argument.
67
+ Specifies the generic keyword arguments SQLE functions accept. Below are the generic
68
+ keyword arguments:
69
+ persist:
70
+ Optional Argument.
71
+ Specifies whether to persist the results of the function in a table or not.
72
+ When set to True, results are persisted in a table; otherwise, results are
73
+ garbage collected at the end of the session.
74
+ Default Value: False
75
+ Types: bool
76
+
77
+ volatile:
78
+ Optional Argument.
79
+ Specifies whether to put the results of the function in a volatile table or not.
80
+ When set to True, results are stored in a volatile table; otherwise not.
81
+ Default Value: False
82
+ Types: bool
83
+
84
+ Function allows the user to partition, hash, order or local order the input
85
+ data. These generic arguments are available for each argument that accepts
86
+ teradataml DataFrame as input and can be accessed as:
87
+ * "<input_data_arg_name>_partition_column" accepts str or list of str (Strings)
88
+ * "<input_data_arg_name>_hash_column" accepts str or list of str (Strings)
89
+ * "<input_data_arg_name>_order_column" accepts str or list of str (Strings)
90
+ * "local_order_<input_data_arg_name>" accepts boolean
91
+ Note:
92
+ These generic arguments are supported by teradataml if the underlying SQLE Engine
93
+ function supports, else an exception is raised.
94
+
95
+ RETURNS:
96
+ Instance of Apriori.
97
+ Output teradataml DataFrames can be accessed using attribute references, such as AprioriObj.<attribute_name>.
98
+ Output teradataml DataFrame attribute name is:
99
+ result
100
+
101
+ RAISES:
102
+ TeradataMlException, TypeError, ValueError
103
+
104
+ EXAMPLES:
105
+ # Notes:
106
+ # 1. Get the connection to Vantage to execute the function.
107
+ # 2. One must import the required functions mentioned in the example from teradataml.
108
+ # 3. Function will raise an error if not supported on the Vantage user is connected to.
109
+
110
+ # Load the example data.
111
+ load_example_data("apriori", ["trans_dense","trans_sparse"])
112
+
113
+ # Create teradataml DataFrame objects.
114
+ dense_table = DataFrame.from_table("trans_dense")
115
+ sparse_table = DataFrame.from_table("trans_sparse")
116
+
117
+
118
+ # Check the list of available analytic functions.
119
+ display_analytic_functions()
120
+
121
+ # Import function Apriori.
122
+ from teradataml import Apriori
123
+
124
+ # Example 1: Find patterns in the input data with DENSE DATA, PARTITION,RULES .
125
+ Apriori_out = Apriori(data=dense_table, target_column="item",
126
+ partition_columns=["location"], max_len=2,
127
+ patterns_or_rules="rules", support=0.01)
128
+
129
+ # Print the result DataFrame.
130
+ print(Apriori_out.result)
131
+
132
+ # Example 2: Find patterns in the input data with SPARSE DATA, NO PARTITIONS, PATTERNS.
133
+ Apriori_out = Apriori(data=sparse_table, target_column="item",
134
+ id_column="tranid", max_len=3)
135
+
136
+ # Print the result DataFrame.
137
+ print(Apriori_out.result)
138
+ """
@@ -0,0 +1,121 @@
1
+ def NERExtractor(data=None, user_defined_data=None, rules_data=None, text_column=None,
2
+ input_language="EN", show_context=0, accumulate=None,
3
+ **generic_arguments):
4
+ """
5
+ DESCRIPTION:
6
+ NERExtractor() performs Named Entity Recognition (NER) on input text
7
+ according to user-defined dictionary words or regular expression (regex) patterns.
8
+
9
+ PARAMETERS:
10
+ data:
11
+ Required Argument.
12
+ Specifies the input teradataml DataFrame.
13
+ Types: teradataml DataFrame
14
+
15
+ user_defined_data:
16
+ Required Argument.
17
+ Specifies the teradataml DataFrame which contains user defined words and the corresponding entity label.
18
+ Types: teradataml DataFrame
19
+
20
+ rules_data:
21
+ Required Argument.
22
+ Specifies the teradataml DataFrame which contains user-defined regex patterns and the corresponding entity label.
23
+ Types: teradataml DataFrame
24
+
25
+ text_column:
26
+ Required Argument.
27
+ Specifies the name of the teradataml DataFrame column that will be used for NER search.
28
+ Types: str
29
+
30
+ input_language:
31
+ Optional Argument.
32
+ Specifies the language of input text.
33
+ Default Value: "EN"
34
+ Types: str
35
+
36
+ show_context:
37
+ Optional Argument.
38
+ Specifies the number of words before and after the matched entity. If leading or trailing
39
+ words are less than "show_context", then ellipsis (...) are added. Must be a positive value
40
+ less than 10.
41
+ Default Value: 0
42
+ Types: int
43
+
44
+ accumulate:
45
+ Optional Argument.
46
+ Specifies the name(s) of input teradataml DataFrame column(s) to copy to the output.
47
+ table to output.
48
+ Types: str or list of str
49
+
50
+ **generic_arguments:
51
+ Optional Argument.
52
+ Specifies the generic keyword arguments SQLE functions accept. Below are the generic
53
+ keyword arguments:
54
+ persist:
55
+ Optional Argument.
56
+ Specifies whether to persist the results of the function in a table or not.
57
+ When set to True, results are persisted in a table; otherwise, results are
58
+ garbage collected at the end of the session.
59
+ Default Value: False
60
+ Types: bool
61
+
62
+ volatile:
63
+ Optional Argument.
64
+ Specifies whether to put the results of the function in a volatile table or not.
65
+ When set to True, results are stored in a volatile table; otherwise not.
66
+ Default Value: False
67
+ Types: bool
68
+
69
+ Function allows the user to partition, hash, order or local order the input
70
+ data. These generic arguments are available for each argument that accepts
71
+ teradataml DataFrame as input and can be accessed as:
72
+ * "<input_data_arg_name>_partition_column" accepts str or list of str (Strings)
73
+ * "<input_data_arg_name>_hash_column" accepts str or list of str (Strings)
74
+ * "<input_data_arg_name>_order_column" accepts str or list of str (Strings)
75
+ * "local_order_<input_data_arg_name>" accepts boolean
76
+ Note:
77
+ These generic arguments are supported by teradataml if the underlying SQLE Engine
78
+ function supports, else an exception is raised.
79
+
80
+ RETURNS:
81
+ Instance of NERExtractor.
82
+ Output teradataml DataFrames can be accessed using attribute references, such as TDNERExtractorObj.<attribute_name>.
83
+ Output teradataml DataFrame attribute name is:
84
+ result
85
+
86
+ RAISES:
87
+ TeradataMlException, TypeError, ValueError
88
+
89
+ EXAMPLES:
90
+ # Notes:
91
+ # 1. Get the connection to Vantage to execute the function.
92
+ # 2. One must import the required functions mentioned in the example from teradataml.
93
+ # 3. Function will raise an error if not supported on the Vantage user is connected to.
94
+
95
+ # Load the example data.
96
+ load_example_data("tdnerextractor", ["ner_input_eng", "ner_dict", "ner_rule"])
97
+
98
+ # Create teradataml DataFrame objects.
99
+ df = DataFrame.from_table("ner_input_eng")
100
+ user_defined_words = DataFrame.from_table("ner_dict")
101
+ rules = DataFrame.from_table("ner_rule")
102
+
103
+
104
+ # Check the list of available analytic functions.
105
+ display_analytic_functions()
106
+
107
+ # Import function NERExtractor.
108
+ from teradataml import NERExtractor
109
+
110
+ # Example 1: Perform Named Entity Recognition (NER) using Rules and Dict with Accumulate.
111
+ NER_out = NERExtractor(data=df,
112
+ user_defined_data=user_defined_words,
113
+ rules_data=rules,
114
+ text_column=["txt"],
115
+ input_language="en",
116
+ show_context=3,
117
+ accumulate=["id"])
118
+
119
+ # Print the result DataFrame.
120
+ print(NER_out.result)
121
+ """
@@ -33,7 +33,7 @@ def NGramSplitter(data=None, text_column=None, delimiter=" ", grams=None, overla
33
33
 
34
34
  delimiter:
35
35
  Optional Argument.
36
- Specifies a character or string that separates words in the input text. The
36
+ Specifies a character or string or a regular expression that separates words in the input text. The
37
37
  default value is the set of all whitespace characters which includes
38
38
  the characters for space, tab, newline, carriage return and some
39
39
  others.
@@ -66,14 +66,14 @@ def NGramSplitter(data=None, text_column=None, delimiter=" ", grams=None, overla
66
66
 
67
67
  punctuation:
68
68
  Optional Argument.
69
- Specifies a string that specifies the punctuation characters for the function
69
+ Specifies a string or a regular expression that specifies the punctuation characters for the function
70
70
  to remove before evaluating the input text.
71
71
  Default Value: "`~#^&*()-"
72
72
  Types: str
73
73
 
74
74
  reset:
75
75
  Optional Argument.
76
- Specifies a string that specifies the character or string that ends a sentence.
76
+ Specifies a string or a regular expression that specifies the character or string that ends a sentence.
77
77
  At the end of a sentence, the function discards any partial n-grams and searches
78
78
  for the next n-gram at the beginning of the next sentence. An n-gram
79
79
  cannot span two sentences.
@@ -0,0 +1,212 @@
1
+ def SMOTE(data = None, encoding_data = None, id_column = None,
2
+ response_column = None, input_columns = None, categorical_columns = None,
3
+ median_standard_deviation = None, minority_class = None,
4
+ oversampling_factor = 5, sampling_strategy = "smote",
5
+ fill_sampleid = True, noninput_columns_value = "sample", n_neighbors = 5,
6
+ seed = None, **generic_arguments):
7
+ """
8
+ DESCRIPTION:
9
+ SMOTE() function generates data by oversampling a minority class using
10
+ smote, adasyn, borderline-2 or smote-nc algorithms.
11
+
12
+
13
+ PARAMETERS:
14
+ data:
15
+ Required Argument.
16
+ Specifies the input teradataml DataFrame.
17
+ Types: teradataml DataFrame
18
+
19
+ encoding_data:
20
+ Optional Argument, Required when "sampling_strategy" is set to 'smotenc' algorithm.
21
+ Specifies the teradataml dataframe containing the ordinal encoding information.
22
+ Types: teradataml DataFrame
23
+
24
+ id_column:
25
+ Required Argument.
26
+ Specifies the name of the column in "data" that
27
+ uniquely identifies a data sample.
28
+ Types: str
29
+
30
+ response_column:
31
+ Optional Argument.
32
+ Specifies the name of the column in "data" that contains the
33
+ numeric value to be used as the response value for a sample.
34
+ Types: str
35
+
36
+ input_columns:
37
+ Required Argument.
38
+ Specifies the name of the input columns in "data" for oversampling.
39
+ Types: str OR list of Strings (str)
40
+
41
+ categorical_columns:
42
+ Optional Argument, Required when "sampling_strategy" is set to 'smotenc' algorithm.
43
+ Specifies the name of the categorical columns in the "data" that
44
+ the function uses for oversampling with smotenc.
45
+ Types: str OR list of Strings (str)
46
+
47
+ median_standard_deviation:
48
+ Optional Argument, Required when "sampling_strategy" is set to 'smotenc' algorithm.
49
+ Specifies the median of the standard deviations computed over the
50
+ numerical input columns.
51
+ Types: float
52
+
53
+ minority_class:
54
+ Required Argument.
55
+ Specifies the minority class for which synthetic samples need to be
56
+ generated.
57
+ Note:
58
+ * The label for minority class under response column must be numeric integer.
59
+ Types: str
60
+
61
+ oversampling_factor:
62
+ Optional Argument.
63
+ Specifies the factor for oversampling the minority class.
64
+ Default Value: 5
65
+ Types: float
66
+
67
+ sampling_strategy:
68
+ Optional Argument.
69
+ Specifies the oversampling algorithm to be used to create synthetic samples.
70
+ Default Value: "smote"
71
+ Permitted Values: "smote", "adasyn", "borderline", "smotenc"
72
+ Types: str
73
+
74
+ fill_sampleid:
75
+ Optional Argument.
76
+ Specifies whether to include the id of the original observation used
77
+ to generate each synthetic observation.
78
+ Default Value: True
79
+ Types: bool
80
+
81
+ noninput_columns_value:
82
+ Optional Argument.
83
+ Specifies the value to put in a sample column for columns not
84
+ specified as input columns.
85
+ Default Value: "sample"
86
+ Permitted Values: "sample", "neighbor", "null"
87
+ Types: str
88
+
89
+ n_neighbors:
90
+ Optional Argument.
91
+ Specifies the number of nearest neighbors for choosing the sample to
92
+ be used in oversampling.
93
+ Default Value: 5
94
+ Types: int
95
+
96
+ seed:
97
+ Optional Argument.
98
+ Specifies the random seed the algorithm uses for repeatable results.
99
+ The function uses the seed for random interpolation and generate the
100
+ synthetic sample.
101
+ Types: int
102
+
103
+ **generic_arguments:
104
+ Specifies the generic keyword arguments SQLE functions accept. Below
105
+ are the generic keyword arguments:
106
+ persist:
107
+ Optional Argument.
108
+ Specifies whether to persist the results of the
109
+ function in a table or not. When set to True,
110
+ results are persisted in a table; otherwise,
111
+ results are garbage collected at the end of the
112
+ session.
113
+ Default Value: False
114
+ Types: bool
115
+
116
+ volatile:
117
+ Optional Argument.
118
+ Specifies whether to put the results of the
119
+ function in a volatile table or not. When set to
120
+ True, results are stored in a volatile table,
121
+ otherwise not.
122
+ Default Value: False
123
+ Types: bool
124
+
125
+ Function allows the user to partition, hash, order or local
126
+ order the input data. These generic arguments are available
127
+ for each argument that accepts teradataml DataFrame as
128
+ input and can be accessed as:
129
+ * "<input_data_arg_name>_partition_column" accepts str or
130
+ list of str (Strings)
131
+ * "<input_data_arg_name>_hash_column" accepts str or list
132
+ of str (Strings)
133
+ * "<input_data_arg_name>_order_column" accepts str or list
134
+ of str (Strings)
135
+ * "local_order_<input_data_arg_name>" accepts boolean
136
+ Note:
137
+ These generic arguments are supported by teradataml if
138
+ the underlying SQL Engine function supports, else an
139
+ exception is raised.
140
+
141
+ RETURNS:
142
+ Instance of SMOTE.
143
+ Output teradataml DataFrames can be accessed using attribute
144
+ references, such as SMOTEObj.<attribute_name>.
145
+ Output teradataml DataFrame attribute name is:
146
+ result
147
+
148
+
149
+ RAISES:
150
+ TeradataMlException, TypeError, ValueError
151
+
152
+
153
+ EXAMPLES:
154
+ # Notes:
155
+ # 1. Get the connection to Vantage, before importing the
156
+ # function in user space.
157
+ # 2. User can import the function, if it is available on
158
+ # Vantage user is connected to.
159
+ # 3. To check the list of analytic functions available on
160
+ # Vantage user connected to, use
161
+ # "display_analytic_functions()".
162
+
163
+ # Load the example data.
164
+ load_example_data("dataframe", "iris_test")
165
+ load_example_data("teradataml", "titanic")
166
+
167
+ # Create teradataml DataFrame objects.
168
+ iris_input = DataFrame.from_table("iris_test").iloc[:25]
169
+ titanic_input = DataFrame("titanic").iloc[:50]
170
+
171
+ # Create Encoding DataFrame objects.
172
+ encoded_data = OrdinalEncodingFit(data=titanic_input,
173
+ target_column=['sex','embarked'],
174
+ approach="AUTO")
175
+
176
+ # Check the list of available analytic functions.
177
+ display_analytic_functions()
178
+
179
+ # Import function SMOTE.
180
+ from teradataml import SMOTE
181
+
182
+ # Example 1 : Generate synthetic samples using smote algorithm.
183
+ smote_out = SMOTE(data = iris_input,
184
+ n_neighbors = 5,
185
+ id_column='id',
186
+ minority_class='3',
187
+ response_column='species',
188
+ input_columns =['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
189
+ oversampling_factor=2,
190
+ sampling_strategy='smote',
191
+ seed=10)
192
+
193
+ # Print the result DataFrame.
194
+ print(smote_out.result)
195
+
196
+ # Example 2 : Generate synthetic samples using smotenc algorithm with categorical columns.
197
+ smote_out2 = SMOTE(data = titanic_input,
198
+ encoding_data = encoded_data.result,
199
+ id_column = 'passenger',
200
+ response_column = 'survived',
201
+ input_columns = ['parch', 'age', 'sibsp'],
202
+ categorical_columns = ['sex', 'embarked'],
203
+ median_standard_deviation = 31.47806044604718,
204
+ minority_class = '1',
205
+ oversampling_factor = 5,
206
+ sampling_strategy = "smotenc",
207
+ noninput_columns_value = "null",
208
+ n_neighbors = 5)
209
+
210
+ # Print the result DataFrame.
211
+ print(smote_out2.result)
212
+ """
@@ -0,0 +1,119 @@
1
+ def TextMorph(data=None, word_column=None, pos=None,
2
+ single_output=False, postag_column=None,
3
+ accumulate=None, **generic_arguments):
4
+ """
5
+ DESCRIPTION:
6
+ TextMorph() function generate morphs of given words in the input dataset.
7
+
8
+ PARAMETERS:
9
+ data:
10
+ Required Argument.
11
+ Specifies the input teradataml DataFrame.
12
+ Types: teradataml DataFrame
13
+
14
+ word_column:
15
+ Required Argument.
16
+ Specifies the name of the input column that contains words for which morphs are to be generated.
17
+ Types: str
18
+
19
+ pos:
20
+ Optional Argument.
21
+ Specifies the part of speech (POS) to output.
22
+ Permitted Values: "NOUN", "VERB", "ADV", "ADJ"
23
+ Types: str or list of str
24
+
25
+ single_output:
26
+ Optional Argument.
27
+ Specifies whether to output only one morph for each word. If set to `False`,
28
+ the function outputs all morphs for each word.
29
+ Default Value: False
30
+ Types: bool
31
+
32
+ postag_column:
33
+ Optional Argument.
34
+ Specifies the name of the column in data that contains the part-of-speech (POS)
35
+ tags of the words, output by the function TD_POSTagger.
36
+ Types: str
37
+
38
+ accumulate:
39
+ Optional Argument.
40
+ Specifies the names of the input columns to copy to the output table.
41
+ Types: str or list of str
42
+
43
+ **generic_arguments:
44
+ Optional Argument.
45
+ Specifies the generic keyword arguments SQLE functions accept. Below are the generic
46
+ keyword arguments:
47
+ persist:
48
+ Optional Argument.
49
+ Specifies whether to persist the results of the function in a table or not.
50
+ When set to True, results are persisted in a table; otherwise, results are
51
+ garbage collected at the end of the session.
52
+ Default Value: False
53
+ Types: bool
54
+
55
+ volatile:
56
+ Optional Argument.
57
+ Specifies whether to put the results of the function in a volatile table or not.
58
+ When set to True, results are stored in a volatile table; otherwise not.
59
+ Default Value: False
60
+ Types: bool
61
+
62
+ Function allows the user to partition, hash, order or local order the input
63
+ data. These generic arguments are available for each argument that accepts
64
+ teradataml DataFrame as input and can be accessed as:
65
+ * "<input_data_arg_name>_partition_column" accepts str or list of str (Strings)
66
+ * "<input_data_arg_name>_hash_column" accepts str or list of str (Strings)
67
+ * "<input_data_arg_name>_order_column" accepts str or list of str (Strings)
68
+ * "local_order_<input_data_arg_name>" accepts boolean
69
+ Note:
70
+ These generic arguments are supported by teradataml if the underlying SQLE Engine
71
+ function supports, else an exception is raised.
72
+
73
+ RETURNS:
74
+ Instance of TextMorph.
75
+ Output teradataml DataFrames can be accessed using attribute references, such as TDTextMorphObj.<attribute_name>.
76
+ Output teradataml DataFrame attribute name is:
77
+ result
78
+
79
+ RAISES:
80
+ TeradataMlException, TypeError, ValueError
81
+
82
+ EXAMPLES:
83
+ # Notes:
84
+ # 1. Get the connection to Vantage to execute the function.
85
+ # 2. One must import the required functions mentioned in the example from teradataml.
86
+ # 3. Function will raise an error if not supported on the Vantage user is connected to.
87
+
88
+ # Load the example data.
89
+ load_example_data("textmorph", ["words_input","pos_input"])
90
+
91
+ # Create teradataml DataFrame objects.
92
+ data1 = DataFrame.from_table("words_input")
93
+ data2 = DataFrame.from_table("pos_input")
94
+
95
+ # Check the list of available analytic functions.
96
+ display_analytic_functions()
97
+
98
+ # Import function TextMorph.
99
+ from teradataml import TextMorph
100
+
101
+ # Example 1: Generate morphs for words in the input dataset.
102
+ TextMorph_out = TextMorph(data=data1,
103
+ word_column="data2",
104
+ pos=["noun", "verb"],
105
+ single_output=True,
106
+ accumulate=["id"])
107
+
108
+ # Print the result DataFrame.
109
+ print(TextMorph_out.result)
110
+
111
+ Example 2 : Generate morphs for words in the input dataset with POS tags.
112
+ TextMorph_pos = TextMorph(data=data2,
113
+ word_column="word",
114
+ postag_column="pos_tag",
115
+ accumulate=["id","pos_tag"])
116
+
117
+ # Print the result DataFrame.
118
+ print(TextMorph_pos.result)
119
+ """