teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (151) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +193 -1
  3. teradataml/__init__.py +2 -1
  4. teradataml/_version.py +2 -2
  5. teradataml/analytics/analytic_function_executor.py +25 -18
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
  8. teradataml/analytics/sqle/__init__.py +20 -2
  9. teradataml/analytics/utils.py +15 -1
  10. teradataml/analytics/valib.py +18 -4
  11. teradataml/automl/__init__.py +341 -112
  12. teradataml/automl/autodataprep/__init__.py +471 -0
  13. teradataml/automl/data_preparation.py +84 -42
  14. teradataml/automl/data_transformation.py +69 -33
  15. teradataml/automl/feature_engineering.py +76 -9
  16. teradataml/automl/feature_exploration.py +639 -25
  17. teradataml/automl/model_training.py +35 -14
  18. teradataml/clients/auth_client.py +2 -2
  19. teradataml/common/__init__.py +1 -2
  20. teradataml/common/constants.py +122 -63
  21. teradataml/common/messagecodes.py +14 -3
  22. teradataml/common/messages.py +8 -4
  23. teradataml/common/sqlbundle.py +40 -10
  24. teradataml/common/utils.py +366 -74
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +348 -86
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/apriori_example.json +22 -0
  29. teradataml/data/byom_example.json +11 -0
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
  37. teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
  38. teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
  39. teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
  40. teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
  41. teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
  42. teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
  43. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
  44. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
  45. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
  46. teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
  47. teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
  48. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
  49. teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
  50. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  51. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
  52. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
  53. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
  54. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
  55. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  56. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
  57. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
  58. teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
  59. teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
  60. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  61. teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
  62. teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
  63. teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
  64. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  65. teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
  66. teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
  67. teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
  68. teradataml/data/hnsw_alter_data.csv +5 -0
  69. teradataml/data/hnsw_data.csv +10 -0
  70. teradataml/data/jsons/byom/h2opredict.json +1 -1
  71. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  72. teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
  73. teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
  74. teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
  75. teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
  76. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  77. teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
  78. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
  79. teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
  80. teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
  81. teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
  82. teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
  83. teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
  84. teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
  85. teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
  86. teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
  87. teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
  88. teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
  89. teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
  90. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  91. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  92. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  93. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
  94. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
  95. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
  96. teradataml/data/ner_dict.csv +8 -0
  97. teradataml/data/ner_input_eng.csv +7 -0
  98. teradataml/data/ner_rule.csv +5 -0
  99. teradataml/data/pos_input.csv +40 -0
  100. teradataml/data/tdnerextractor_example.json +14 -0
  101. teradataml/data/teradataml_example.json +21 -0
  102. teradataml/data/textmorph_example.json +5 -0
  103. teradataml/data/to_num_data.csv +4 -0
  104. teradataml/data/tochar_data.csv +5 -0
  105. teradataml/data/trans_dense.csv +16 -0
  106. teradataml/data/trans_sparse.csv +55 -0
  107. teradataml/data/vectordistance_example.json +1 -1
  108. teradataml/dataframe/copy_to.py +45 -29
  109. teradataml/dataframe/data_transfer.py +72 -46
  110. teradataml/dataframe/dataframe.py +642 -166
  111. teradataml/dataframe/dataframe_utils.py +167 -22
  112. teradataml/dataframe/functions.py +135 -20
  113. teradataml/dataframe/setop.py +11 -6
  114. teradataml/dataframe/sql.py +330 -78
  115. teradataml/dbutils/dbutils.py +556 -140
  116. teradataml/dbutils/filemgr.py +14 -10
  117. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  118. teradataml/lib/aed_0_1.dll +0 -0
  119. teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
  120. teradataml/opensource/_class.py +141 -17
  121. teradataml/opensource/{constants.py → _constants.py} +7 -3
  122. teradataml/opensource/_lightgbm.py +52 -53
  123. teradataml/opensource/_sklearn.py +1008 -0
  124. teradataml/opensource/_wrapper_utils.py +5 -5
  125. teradataml/options/__init__.py +47 -15
  126. teradataml/options/configure.py +103 -26
  127. teradataml/options/display.py +13 -2
  128. teradataml/plot/axis.py +47 -8
  129. teradataml/plot/figure.py +33 -0
  130. teradataml/plot/plot.py +63 -13
  131. teradataml/scriptmgmt/UserEnv.py +307 -40
  132. teradataml/scriptmgmt/lls_utils.py +428 -145
  133. teradataml/store/__init__.py +2 -3
  134. teradataml/store/feature_store/feature_store.py +102 -7
  135. teradataml/table_operators/Apply.py +48 -19
  136. teradataml/table_operators/Script.py +23 -2
  137. teradataml/table_operators/TableOperator.py +3 -1
  138. teradataml/table_operators/table_operator_util.py +58 -9
  139. teradataml/utils/dtypes.py +49 -1
  140. teradataml/utils/internal_buffer.py +38 -0
  141. teradataml/utils/validators.py +377 -62
  142. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
  143. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
  144. teradataml/data/SQL_Fundamentals.pdf +0 -0
  145. teradataml/libaed_0_1.dylib +0 -0
  146. teradataml/libaed_0_1.so +0 -0
  147. teradataml/opensource/sklearn/__init__.py +0 -0
  148. teradataml/store/vector_store/__init__.py +0 -1586
  149. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
  150. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
  151. {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
@@ -0,0 +1,121 @@
1
+ def NERExtractor(data=None, user_defined_data=None, rules_data=None, text_column=None,
2
+ input_language="EN", show_context=0, accumulate=None,
3
+ **generic_arguments):
4
+ """
5
+ DESCRIPTION:
6
+ NERExtractor() performs Named Entity Recognition (NER) on input text
7
+ according to user-defined dictionary words or regular expression (regex) patterns.
8
+
9
+ PARAMETERS:
10
+ data:
11
+ Required Argument.
12
+ Specifies the input teradataml DataFrame.
13
+ Types: teradataml DataFrame
14
+
15
+ user_defined_data:
16
+ Required Argument.
17
+ Specifies the teradataml DataFrame which contains user defined words and the corresponding entity label.
18
+ Types: teradataml DataFrame
19
+
20
+ rules_data:
21
+ Required Argument.
22
+ Specifies the teradataml DataFrame which contains user-defined regex patterns and the corresponding entity label.
23
+ Types: teradataml DataFrame
24
+
25
+ text_column:
26
+ Required Argument.
27
+ Specifies the name of the teradataml DataFrame column that will be used for NER search.
28
+ Types: str
29
+
30
+ input_language:
31
+ Optional Argument.
32
+ Specifies the language of input text.
33
+ Default Value: "EN"
34
+ Types: str
35
+
36
+ show_context:
37
+ Optional Argument.
38
+ Specifies the number of words before and after the matched entity. If leading or trailing
39
+ words are less than "show_context", then ellipsis (...) are added. Must be a positive value
40
+ less than 10.
41
+ Default Value: 0
42
+ Types: int
43
+
44
+ accumulate:
45
+ Optional Argument.
46
+ Specifies the name(s) of input teradataml DataFrame column(s) to copy to the output.
47
+ table to output.
48
+ Types: str or list of str
49
+
50
+ **generic_arguments:
51
+ Optional Argument.
52
+ Specifies the generic keyword arguments SQLE functions accept. Below are the generic
53
+ keyword arguments:
54
+ persist:
55
+ Optional Argument.
56
+ Specifies whether to persist the results of the function in a table or not.
57
+ When set to True, results are persisted in a table; otherwise, results are
58
+ garbage collected at the end of the session.
59
+ Default Value: False
60
+ Types: bool
61
+
62
+ volatile:
63
+ Optional Argument.
64
+ Specifies whether to put the results of the function in a volatile table or not.
65
+ When set to True, results are stored in a volatile table; otherwise not.
66
+ Default Value: False
67
+ Types: bool
68
+
69
+ Function allows the user to partition, hash, order or local order the input
70
+ data. These generic arguments are available for each argument that accepts
71
+ teradataml DataFrame as input and can be accessed as:
72
+ * "<input_data_arg_name>_partition_column" accepts str or list of str (Strings)
73
+ * "<input_data_arg_name>_hash_column" accepts str or list of str (Strings)
74
+ * "<input_data_arg_name>_order_column" accepts str or list of str (Strings)
75
+ * "local_order_<input_data_arg_name>" accepts boolean
76
+ Note:
77
+ These generic arguments are supported by teradataml if the underlying SQLE Engine
78
+ function supports, else an exception is raised.
79
+
80
+ RETURNS:
81
+ Instance of NERExtractor.
82
+ Output teradataml DataFrames can be accessed using attribute references, such as TDNERExtractorObj.<attribute_name>.
83
+ Output teradataml DataFrame attribute name is:
84
+ result
85
+
86
+ RAISES:
87
+ TeradataMlException, TypeError, ValueError
88
+
89
+ EXAMPLES:
90
+ # Notes:
91
+ # 1. Get the connection to Vantage to execute the function.
92
+ # 2. One must import the required functions mentioned in the example from teradataml.
93
+ # 3. Function will raise an error if not supported on the Vantage user is connected to.
94
+
95
+ # Load the example data.
96
+ load_example_data("tdnerextractor", ["ner_input_eng", "ner_dict", "ner_rule"])
97
+
98
+ # Create teradataml DataFrame objects.
99
+ df = DataFrame.from_table("ner_input_eng")
100
+ user_defined_words = DataFrame.from_table("ner_dict")
101
+ rules = DataFrame.from_table("ner_rule")
102
+
103
+
104
+ # Check the list of available analytic functions.
105
+ display_analytic_functions()
106
+
107
+ # Import function NERExtractor.
108
+ from teradataml import NERExtractor
109
+
110
+ # Example 1: Perform Named Entity Recognition (NER) using Rules and Dict with Accumulate.
111
+ NER_out = NERExtractor(data=df,
112
+ user_defined_data=user_defined_words,
113
+ rules_data=rules,
114
+ text_column=["txt"],
115
+ input_language="en",
116
+ show_context=3,
117
+ accumulate=["id"])
118
+
119
+ # Print the result DataFrame.
120
+ print(NER_out.result)
121
+ """
@@ -33,7 +33,7 @@ def NGramSplitter(data=None, text_column=None, delimiter=" ", grams=None, overla
33
33
 
34
34
  delimiter:
35
35
  Optional Argument.
36
- Specifies a character or string that separates words in the input text. The
36
+ Specifies a character or string or a regular expression that separates words in the input text. The
37
37
  default value is the set of all whitespace characters which includes
38
38
  the characters for space, tab, newline, carriage return and some
39
39
  others.
@@ -66,14 +66,14 @@ def NGramSplitter(data=None, text_column=None, delimiter=" ", grams=None, overla
66
66
 
67
67
  punctuation:
68
68
  Optional Argument.
69
- Specifies a string that specifies the punctuation characters for the function
69
+ Specifies a string or a regular expression that specifies the punctuation characters for the function
70
70
  to remove before evaluating the input text.
71
71
  Default Value: "`~#^&*()-"
72
72
  Types: str
73
73
 
74
74
  reset:
75
75
  Optional Argument.
76
- Specifies a string that specifies the character or string that ends a sentence.
76
+ Specifies a string or a regular expression that specifies the character or string that ends a sentence.
77
77
  At the end of a sentence, the function discards any partial n-grams and searches
78
78
  for the next n-gram at the beginning of the next sentence. An n-gram
79
79
  cannot span two sentences.
@@ -0,0 +1,212 @@
1
+ def SMOTE(data = None, encoding_data = None, id_column = None,
2
+ response_column = None, input_columns = None, categorical_columns = None,
3
+ median_standard_deviation = None, minority_class = None,
4
+ oversampling_factor = 5, sampling_strategy = "smote",
5
+ fill_sampleid = True, noninput_columns_value = "sample", n_neighbors = 5,
6
+ seed = None, **generic_arguments):
7
+ """
8
+ DESCRIPTION:
9
+ SMOTE() function generates data by oversampling a minority class using
10
+ smote, adasyn, borderline-2 or smote-nc algorithms.
11
+
12
+
13
+ PARAMETERS:
14
+ data:
15
+ Required Argument.
16
+ Specifies the input teradataml DataFrame.
17
+ Types: teradataml DataFrame
18
+
19
+ encoding_data:
20
+ Optional Argument, Required when "sampling_strategy" is set to 'smotenc' algorithm.
21
+ Specifies the teradataml dataframe containing the ordinal encoding information.
22
+ Types: teradataml DataFrame
23
+
24
+ id_column:
25
+ Required Argument.
26
+ Specifies the name of the column in "data" that
27
+ uniquely identifies a data sample.
28
+ Types: str
29
+
30
+ response_column:
31
+ Optional Argument.
32
+ Specifies the name of the column in "data" that contains the
33
+ numeric value to be used as the response value for a sample.
34
+ Types: str
35
+
36
+ input_columns:
37
+ Required Argument.
38
+ Specifies the name of the input columns in "data" for oversampling.
39
+ Types: str OR list of Strings (str)
40
+
41
+ categorical_columns:
42
+ Optional Argument, Required when "sampling_strategy" is set to 'smotenc' algorithm.
43
+ Specifies the name of the categorical columns in the "data" that
44
+ the function uses for oversampling with smotenc.
45
+ Types: str OR list of Strings (str)
46
+
47
+ median_standard_deviation:
48
+ Optional Argument, Required when "sampling_strategy" is set to 'smotenc' algorithm.
49
+ Specifies the median of the standard deviations computed over the
50
+ numerical input columns.
51
+ Types: float
52
+
53
+ minority_class:
54
+ Required Argument.
55
+ Specifies the minority class for which synthetic samples need to be
56
+ generated.
57
+ Note:
58
+ * The label for minority class under response column must be numeric integer.
59
+ Types: str
60
+
61
+ oversampling_factor:
62
+ Optional Argument.
63
+ Specifies the factor for oversampling the minority class.
64
+ Default Value: 5
65
+ Types: float
66
+
67
+ sampling_strategy:
68
+ Optional Argument.
69
+ Specifies the oversampling algorithm to be used to create synthetic samples.
70
+ Default Value: "smote"
71
+ Permitted Values: "smote", "adasyn", "borderline", "smotenc"
72
+ Types: str
73
+
74
+ fill_sampleid:
75
+ Optional Argument.
76
+ Specifies whether to include the id of the original observation used
77
+ to generate each synthetic observation.
78
+ Default Value: True
79
+ Types: bool
80
+
81
+ noninput_columns_value:
82
+ Optional Argument.
83
+ Specifies the value to put in a sample column for columns not
84
+ specified as input columns.
85
+ Default Value: "sample"
86
+ Permitted Values: "sample", "neighbor", "null"
87
+ Types: str
88
+
89
+ n_neighbors:
90
+ Optional Argument.
91
+ Specifies the number of nearest neighbors for choosing the sample to
92
+ be used in oversampling.
93
+ Default Value: 5
94
+ Types: int
95
+
96
+ seed:
97
+ Optional Argument.
98
+ Specifies the random seed the algorithm uses for repeatable results.
99
+ The function uses the seed for random interpolation and generate the
100
+ synthetic sample.
101
+ Types: int
102
+
103
+ **generic_arguments:
104
+ Specifies the generic keyword arguments SQLE functions accept. Below
105
+ are the generic keyword arguments:
106
+ persist:
107
+ Optional Argument.
108
+ Specifies whether to persist the results of the
109
+ function in a table or not. When set to True,
110
+ results are persisted in a table; otherwise,
111
+ results are garbage collected at the end of the
112
+ session.
113
+ Default Value: False
114
+ Types: bool
115
+
116
+ volatile:
117
+ Optional Argument.
118
+ Specifies whether to put the results of the
119
+ function in a volatile table or not. When set to
120
+ True, results are stored in a volatile table,
121
+ otherwise not.
122
+ Default Value: False
123
+ Types: bool
124
+
125
+ Function allows the user to partition, hash, order or local
126
+ order the input data. These generic arguments are available
127
+ for each argument that accepts teradataml DataFrame as
128
+ input and can be accessed as:
129
+ * "<input_data_arg_name>_partition_column" accepts str or
130
+ list of str (Strings)
131
+ * "<input_data_arg_name>_hash_column" accepts str or list
132
+ of str (Strings)
133
+ * "<input_data_arg_name>_order_column" accepts str or list
134
+ of str (Strings)
135
+ * "local_order_<input_data_arg_name>" accepts boolean
136
+ Note:
137
+ These generic arguments are supported by teradataml if
138
+ the underlying SQL Engine function supports, else an
139
+ exception is raised.
140
+
141
+ RETURNS:
142
+ Instance of SMOTE.
143
+ Output teradataml DataFrames can be accessed using attribute
144
+ references, such as SMOTEObj.<attribute_name>.
145
+ Output teradataml DataFrame attribute name is:
146
+ result
147
+
148
+
149
+ RAISES:
150
+ TeradataMlException, TypeError, ValueError
151
+
152
+
153
+ EXAMPLES:
154
+ # Notes:
155
+ # 1. Get the connection to Vantage, before importing the
156
+ # function in user space.
157
+ # 2. User can import the function, if it is available on
158
+ # Vantage user is connected to.
159
+ # 3. To check the list of analytic functions available on
160
+ # Vantage user connected to, use
161
+ # "display_analytic_functions()".
162
+
163
+ # Load the example data.
164
+ load_example_data("dataframe", "iris_test")
165
+ load_example_data("teradataml", "titanic")
166
+
167
+ # Create teradataml DataFrame objects.
168
+ iris_input = DataFrame.from_table("iris_test").iloc[:25]
169
+ titanic_input = DataFrame("titanic").iloc[:50]
170
+
171
+ # Create Encoding DataFrame objects.
172
+ encoded_data = OrdinalEncodingFit(data=titanic_input,
173
+ target_column=['sex','embarked'],
174
+ approach="AUTO")
175
+
176
+ # Check the list of available analytic functions.
177
+ display_analytic_functions()
178
+
179
+ # Import function SMOTE.
180
+ from teradataml import SMOTE
181
+
182
+ # Example 1 : Generate synthetic samples using smote algorithm.
183
+ smote_out = SMOTE(data = iris_input,
184
+ n_neighbors = 5,
185
+ id_column='id',
186
+ minority_class='3',
187
+ response_column='species',
188
+ input_columns =['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
189
+ oversampling_factor=2,
190
+ sampling_strategy='smote',
191
+ seed=10)
192
+
193
+ # Print the result DataFrame.
194
+ print(smote_out.result)
195
+
196
+ # Example 2 : Generate synthetic samples using smotenc algorithm with categorical columns.
197
+ smote_out2 = SMOTE(data = titanic_input,
198
+ encoding_data = encoded_data.result,
199
+ id_column = 'passenger',
200
+ response_column = 'survived',
201
+ input_columns = ['parch', 'age', 'sibsp'],
202
+ categorical_columns = ['sex', 'embarked'],
203
+ median_standard_deviation = 31.47806044604718,
204
+ minority_class = '1',
205
+ oversampling_factor = 5,
206
+ sampling_strategy = "smotenc",
207
+ noninput_columns_value = "null",
208
+ n_neighbors = 5)
209
+
210
+ # Print the result DataFrame.
211
+ print(smote_out2.result)
212
+ """
@@ -1,4 +1,4 @@
1
- def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM",
1
+ def Shap(data = None, object = None, id_column=None, training_function = None,
2
2
  model_type = "Regression", input_columns = None, detailed = False,
3
3
  accumulate = None, num_parallel_trees = 1000, num_boost_rounds = 10,
4
4
  **generic_arguments):
@@ -29,7 +29,6 @@ def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM
29
29
  training_function:
30
30
  Required Argument.
31
31
  Specifies the model type name.
32
- Default Value: "TD_GLM"
33
32
  Permitted Values: TD_GLM, TD_DECISIONFOREST, TD_XGBOOST
34
33
  Types: str
35
34
 
@@ -50,6 +49,9 @@ def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM
50
49
  Optional Argument.
51
50
  Specifies whether to output detailed shap information about the
52
51
  forest trees.
52
+ Note:
53
+ * It is only supported for "TD_XGBOOST" and "TD_DECISIONFOREST"
54
+ training functions.
53
55
  Default Value: False
54
56
  Types: bool
55
57
 
@@ -151,10 +153,10 @@ def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM
151
153
 
152
154
  # Example 1: Shap for classification model.
153
155
  XGBoost_out = XGBoost(data=iris_input,
154
- input_columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
155
- response_column = 'species',
156
- model_type='Classification',
157
- iter_num=25)
156
+ input_columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
157
+ response_column = 'species',
158
+ model_type='Classification',
159
+ iter_num=25)
158
160
 
159
161
  Shap_out = Shap(data=iris_input,
160
162
  object=XGBoost_out.result,
@@ -200,4 +202,24 @@ def Shap(data = None, object = None, id_column=None, training_function = "TD_GLM
200
202
 
201
203
  # Print the result DataFrame.
202
204
  print(Shap_out2.output_data)
205
+
206
+ # Example 3: Shap for GLM model.
207
+ from teradataml import GLM
208
+ GLM_out = GLM(data=transform_obj.result,
209
+ input_columns=['MedInc', 'HouseAge', 'AveRooms',
210
+ 'AveBedrms', 'Population', 'AveOccup',
211
+ 'Latitude', 'Longitude'],
212
+ response_column="MedHouseVal",
213
+ family="GAUSSIAN")
214
+
215
+ Shap_out3 = Shap(data=transform_obj.result,
216
+ object=GLM_out.result,
217
+ id_column='id',
218
+ training_function="TD_GLM",
219
+ model_type="Regression",
220
+ input_columns=['MedInc', 'HouseAge', 'AveRooms','AveBedrms', 'Population', 'AveOccup','Latitude', 'Longitude'],
221
+ detailed=False)
222
+
223
+ # Print the result DataFrame.
224
+ print(Shap_out3.output_data)
203
225
  """
@@ -0,0 +1,119 @@
1
+ def TextMorph(data=None, word_column=None, pos=None,
2
+ single_output=False, postag_column=None,
3
+ accumulate=None, **generic_arguments):
4
+ """
5
+ DESCRIPTION:
6
+ TextMorph() function generate morphs of given words in the input dataset.
7
+
8
+ PARAMETERS:
9
+ data:
10
+ Required Argument.
11
+ Specifies the input teradataml DataFrame.
12
+ Types: teradataml DataFrame
13
+
14
+ word_column:
15
+ Required Argument.
16
+ Specifies the name of the input column that contains words for which morphs are to be generated.
17
+ Types: str
18
+
19
+ pos:
20
+ Optional Argument.
21
+ Specifies the part of speech (POS) to output.
22
+ Permitted Values: "NOUN", "VERB", "ADV", "ADJ"
23
+ Types: str or list of str
24
+
25
+ single_output:
26
+ Optional Argument.
27
+ Specifies whether to output only one morph for each word. If set to `False`,
28
+ the function outputs all morphs for each word.
29
+ Default Value: False
30
+ Types: bool
31
+
32
+ postag_column:
33
+ Optional Argument.
34
+ Specifies the name of the column in data that contains the part-of-speech (POS)
35
+ tags of the words, output by the function TD_POSTagger.
36
+ Types: str
37
+
38
+ accumulate:
39
+ Optional Argument.
40
+ Specifies the names of the input columns to copy to the output table.
41
+ Types: str or list of str
42
+
43
+ **generic_arguments:
44
+ Optional Argument.
45
+ Specifies the generic keyword arguments SQLE functions accept. Below are the generic
46
+ keyword arguments:
47
+ persist:
48
+ Optional Argument.
49
+ Specifies whether to persist the results of the function in a table or not.
50
+ When set to True, results are persisted in a table; otherwise, results are
51
+ garbage collected at the end of the session.
52
+ Default Value: False
53
+ Types: bool
54
+
55
+ volatile:
56
+ Optional Argument.
57
+ Specifies whether to put the results of the function in a volatile table or not.
58
+ When set to True, results are stored in a volatile table; otherwise not.
59
+ Default Value: False
60
+ Types: bool
61
+
62
+ Function allows the user to partition, hash, order or local order the input
63
+ data. These generic arguments are available for each argument that accepts
64
+ teradataml DataFrame as input and can be accessed as:
65
+ * "<input_data_arg_name>_partition_column" accepts str or list of str (Strings)
66
+ * "<input_data_arg_name>_hash_column" accepts str or list of str (Strings)
67
+ * "<input_data_arg_name>_order_column" accepts str or list of str (Strings)
68
+ * "local_order_<input_data_arg_name>" accepts boolean
69
+ Note:
70
+ These generic arguments are supported by teradataml if the underlying SQLE Engine
71
+ function supports, else an exception is raised.
72
+
73
+ RETURNS:
74
+ Instance of TextMorph.
75
+ Output teradataml DataFrames can be accessed using attribute references, such as TDTextMorphObj.<attribute_name>.
76
+ Output teradataml DataFrame attribute name is:
77
+ result
78
+
79
+ RAISES:
80
+ TeradataMlException, TypeError, ValueError
81
+
82
+ EXAMPLES:
83
+ # Notes:
84
+ # 1. Get the connection to Vantage to execute the function.
85
+ # 2. One must import the required functions mentioned in the example from teradataml.
86
+ # 3. Function will raise an error if not supported on the Vantage user is connected to.
87
+
88
+ # Load the example data.
89
+ load_example_data("textmorph", ["words_input","pos_input"])
90
+
91
+ # Create teradataml DataFrame objects.
92
+ data1 = DataFrame.from_table("words_input")
93
+ data2 = DataFrame.from_table("pos_input")
94
+
95
+ # Check the list of available analytic functions.
96
+ display_analytic_functions()
97
+
98
+ # Import function TextMorph.
99
+ from teradataml import TextMorph
100
+
101
+ # Example 1: Generate morphs for words in the input dataset.
102
+ TextMorph_out = TextMorph(data=data1,
103
+ word_column="data2",
104
+ pos=["noun", "verb"],
105
+ single_output=True,
106
+ accumulate=["id"])
107
+
108
+ # Print the result DataFrame.
109
+ print(TextMorph_out.result)
110
+
111
+ Example 2 : Generate morphs for words in the input dataset with POS tags.
112
+ TextMorph_pos = TextMorph(data=data2,
113
+ word_column="word",
114
+ postag_column="pos_tag",
115
+ accumulate=["id","pos_tag"])
116
+
117
+ # Print the result DataFrame.
118
+ print(TextMorph_pos.result)
119
+ """
@@ -1,6 +1,9 @@
1
- def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=True, stem_tokens=False,
2
- remove_stopwords=False, accumulate=None, delimiter=" \t\n\f\r",
3
- punctuation="!#$%&()*+,-./:;?@\^_`{|}~", token_col_name=None, **generic_arguments):
1
+ def TextParser(data=None, object=None, text_column=None, enforce_token_limit=False,
2
+ convert_to_lowercase=True, stem_tokens=False, remove_stopwords=False,
3
+ accumulate=None, delimiter=" \t\n\f\r", delimiter_regex=None,
4
+ punctuation="!#$%&()*+,-./:;?@\^_`{|}~", token_col_name=None,
5
+ doc_id_column=None, list_positions=False, token_frequency=False,
6
+ output_by_word=True, **generic_arguments):
4
7
  """
5
8
  DESCRIPTION:
6
9
  The TextParser() function can parse text and perform the following operations:
@@ -38,6 +41,13 @@ def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=Tr
38
41
  Specifies the name of the input data column whose contents are to be tokenized.
39
42
  Types: str
40
43
 
44
+ enforce_token_limit:
45
+ Optional Argument.
46
+ Specifies whether to throw an informative error when finding token larger than
47
+ 64K/32K or silently discard those larger tokens.
48
+ Default Value: False
49
+ Types: bool
50
+
41
51
  convert_to_lowercase:
42
52
  Optional Argument.
43
53
  Specifies whether to convert the text in "text_column" to lowercase.
@@ -71,6 +81,11 @@ def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=Tr
71
81
  Default Value: " \\t\\n\\f\\r"
72
82
  Types: str
73
83
 
84
+ delimiter_regex:
85
+ Optional Argument.
86
+ Specifies a Perl Compatible regular expression that represents the word delimiter.
87
+ Types: str
88
+
74
89
  punctuation:
75
90
  Optional Argument.
76
91
  Specifies the punctuation characters to replace with a space in the input text.
@@ -83,6 +98,29 @@ def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=Tr
83
98
  the text of the specified column in the "text_column" element.
84
99
  Types: str
85
100
 
101
+ doc_id_column:
102
+ Optional Argument.
103
+ Specifies the name of the column that uniquely identifies a row in the input table.
104
+ Types: str
105
+
106
+ list_positions:
107
+ Optional Argument.
108
+ Specifies whether to output the positions of a word in list form.
109
+ Default Value: False
110
+ Types: bool
111
+
112
+ token_frequency:
113
+ Optional Argument.
114
+ Specifies whether to output the frequency for each token.
115
+ Default Value: False
116
+ Types: bool
117
+
118
+ output_by_word:
119
+ Optional Argument.
120
+ Specifies whether to output each token in a separate row or all tokens in one.
121
+ Default Value: True
122
+ Types: bool
123
+
86
124
  **generic_arguments:
87
125
  Specifies the generic keyword arguments SQLE functions accept. Below
88
126
  are the generic keyword arguments:
@@ -170,4 +208,17 @@ def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=Tr
170
208
 
171
209
  # Print the result DataFrame.
172
210
  print(TextParser_out.result)
211
+
212
+ # Example 3 : Tokenize words in "text_data" column using delimiter regex,
213
+ # convert tokens to lowercase and output token positions in a list format
214
+ TextParser_out = TextParser(data=complaints,
215
+ text_column="text_data",
216
+ doc_id_column="doc_id",
217
+ delimeter_regex="[ \t\f\r\n]+",
218
+ list_positions=True,
219
+ convert_to_lowercase=True,
220
+ output_by_word=False)
221
+
222
+ # Print the result DataFrame.
223
+ print(TextParser_out.result)
173
224
  """
@@ -94,7 +94,7 @@ def ACF(data=None, data_filter_expr=None, max_lags=None,
94
94
  Default behavior when "alpha" avoided or not a positive
95
95
  float:
96
96
  * The function does not return confidence intervals.
97
- Types: float
97
+ Types: int OR float
98
98
 
99
99
  **generic_arguments:
100
100
  Specifies the generic keyword arguments of UAF functions.
@@ -169,7 +169,7 @@ def ArimaEstimate(data1=None, data1_filter_expr=None, data2=None,
169
169
  at the end to specify the intercept coefficient initial
170
170
  value, then the formula is as follows:
171
171
  p+q+P+Q+constant
172
- Types: float, list of float
172
+ Types: int, list of int, float, list of float
173
173
 
174
174
  fixed:
175
175
  Optional Argument.
@@ -183,7 +183,7 @@ def ArimaEstimate(data1=None, data1_filter_expr=None, data2=None,
183
183
  at the end to specify the intercept coefficient initial
184
184
  value, then the formula is as follows:
185
185
  p+q+P+Q+constant
186
- Types: float, list of float
186
+ Types: int, list of int, float, list of float
187
187
 
188
188
  constant:
189
189
  Optional Argument.
@@ -95,7 +95,7 @@ def ArimaXEstimate(data1=None, data1_filter_expr=None, data2=None,
95
95
  MA coefficients, the seasonal SAR regression
96
96
  coefficients and the SMA coefficients. The formula is
97
97
  as follows: 'p+q+P+Q+CONSTANT-length-init-list'
98
- Types: float, list of float
98
+ Types: int, list of int, float, list of float
99
99
 
100
100
  fixed:
101
101
  Optional Argument.
@@ -107,7 +107,7 @@ def ArimaXEstimate(data1=None, data1_filter_expr=None, data2=None,
107
107
  If an intercept is needed, one more value is added at
108
108
  the end to specify the intercept coefficient initial value.
109
109
  The formula is as follows: 'p+q+P+Q+CONSTANT-length-fixed-list'
110
- Types: float, list of float
110
+ Types: int, list of int, float, list of float
111
111
 
112
112
  constant:
113
113
  Optional Argument.