teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (126) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +315 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +95 -8
  6. teradataml/analytics/byom/__init__.py +1 -1
  7. teradataml/analytics/json_parser/metadata.py +12 -3
  8. teradataml/analytics/json_parser/utils.py +7 -2
  9. teradataml/analytics/sqle/__init__.py +5 -1
  10. teradataml/analytics/table_operator/__init__.py +1 -1
  11. teradataml/analytics/uaf/__init__.py +1 -1
  12. teradataml/analytics/utils.py +4 -0
  13. teradataml/analytics/valib.py +18 -4
  14. teradataml/automl/__init__.py +51 -6
  15. teradataml/automl/data_preparation.py +59 -35
  16. teradataml/automl/data_transformation.py +58 -33
  17. teradataml/automl/feature_engineering.py +27 -12
  18. teradataml/automl/model_training.py +73 -46
  19. teradataml/common/constants.py +88 -29
  20. teradataml/common/garbagecollector.py +2 -1
  21. teradataml/common/messagecodes.py +19 -3
  22. teradataml/common/messages.py +6 -1
  23. teradataml/common/sqlbundle.py +64 -12
  24. teradataml/common/utils.py +246 -47
  25. teradataml/common/warnings.py +11 -0
  26. teradataml/context/context.py +161 -27
  27. teradataml/data/amazon_reviews_25.csv +26 -0
  28. teradataml/data/byom_example.json +11 -0
  29. teradataml/data/dataframe_example.json +18 -2
  30. teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
  31. teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
  32. teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
  33. teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
  34. teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
  35. teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
  36. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  37. teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
  38. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  39. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  40. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  41. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  42. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
  43. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  44. teradataml/data/hnsw_alter_data.csv +5 -0
  45. teradataml/data/hnsw_data.csv +10 -0
  46. teradataml/data/jsons/byom/h2opredict.json +1 -1
  47. teradataml/data/jsons/byom/onnxembeddings.json +266 -0
  48. teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
  49. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  50. teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
  51. teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
  52. teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
  53. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  54. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  55. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  56. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  57. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  58. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  59. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  60. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  61. teradataml/data/medical_readings.csv +101 -0
  62. teradataml/data/patient_profile.csv +101 -0
  63. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  64. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  65. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  66. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  67. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  68. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  69. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  70. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  71. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  72. teradataml/data/target_udt_data.csv +8 -0
  73. teradataml/data/templates/open_source_ml.json +3 -2
  74. teradataml/data/teradataml_example.json +8 -0
  75. teradataml/data/vectordistance_example.json +4 -0
  76. teradataml/dataframe/copy_to.py +8 -3
  77. teradataml/dataframe/data_transfer.py +11 -1
  78. teradataml/dataframe/dataframe.py +1049 -285
  79. teradataml/dataframe/dataframe_utils.py +152 -20
  80. teradataml/dataframe/functions.py +578 -35
  81. teradataml/dataframe/setop.py +11 -6
  82. teradataml/dataframe/sql.py +185 -16
  83. teradataml/dbutils/dbutils.py +1049 -115
  84. teradataml/dbutils/filemgr.py +48 -1
  85. teradataml/hyperparameter_tuner/optimizer.py +12 -1
  86. teradataml/lib/aed_0_1.dll +0 -0
  87. teradataml/opensource/__init__.py +1 -1
  88. teradataml/opensource/_base.py +1466 -0
  89. teradataml/opensource/_class.py +464 -0
  90. teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
  91. teradataml/opensource/_lightgbm.py +949 -0
  92. teradataml/opensource/_sklearn.py +1008 -0
  93. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
  94. teradataml/options/__init__.py +54 -38
  95. teradataml/options/configure.py +131 -27
  96. teradataml/options/display.py +13 -2
  97. teradataml/plot/axis.py +47 -8
  98. teradataml/plot/figure.py +33 -0
  99. teradataml/plot/plot.py +63 -13
  100. teradataml/scriptmgmt/UserEnv.py +5 -5
  101. teradataml/scriptmgmt/lls_utils.py +130 -40
  102. teradataml/store/__init__.py +12 -0
  103. teradataml/store/feature_store/__init__.py +0 -0
  104. teradataml/store/feature_store/constants.py +291 -0
  105. teradataml/store/feature_store/feature_store.py +2318 -0
  106. teradataml/store/feature_store/models.py +1505 -0
  107. teradataml/table_operators/Apply.py +32 -18
  108. teradataml/table_operators/Script.py +3 -1
  109. teradataml/table_operators/TableOperator.py +3 -1
  110. teradataml/table_operators/query_generator.py +3 -0
  111. teradataml/table_operators/table_operator_query_generator.py +3 -1
  112. teradataml/table_operators/table_operator_util.py +37 -38
  113. teradataml/table_operators/templates/dataframe_register.template +69 -0
  114. teradataml/utils/dtypes.py +51 -2
  115. teradataml/utils/internal_buffer.py +18 -0
  116. teradataml/utils/validators.py +99 -8
  117. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
  118. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
  119. teradataml/libaed_0_1.dylib +0 -0
  120. teradataml/libaed_0_1.so +0 -0
  121. teradataml/opensource/sklearn/__init__.py +0 -1
  122. teradataml/opensource/sklearn/_class.py +0 -255
  123. teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
  124. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
  125. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
  126. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
@@ -0,0 +1,242 @@
1
+ def ONNXEmbeddings(newdata=None, modeldata=None, tokenizerdata=None, accumulate=None, model_output_tensor=None,
2
+ encode_max_length=512, show_model_properties=False, output_column_prefix="emb_",
3
+ output_format="VARBYTE(3072)", overwrite_cached_models="false", is_debug=False,
4
+ enable_memory_check=False, **generic_arguments):
5
+ """
6
+ DESCRIPTION:
7
+ The ONNXEmbeddings() function is used to calculate embeddings values in
8
+ Vantage with a HuggingFace model that has been created outside Vantage
9
+ and exported to Vantage using ONNX format.
10
+
11
+ PARAMETERS:
12
+ newdata:
13
+ Required Argument.
14
+ Specifies the input teradataml DataFrame that contains
15
+ the data to be scored.
16
+ Types: teradataml DataFrame
17
+
18
+ modeldata:
19
+ Required Argument.
20
+ Specifies the model teradataml DataFrame to be used for
21
+ scoring.
22
+ Note:
23
+ * Use `retrieve_byom()` to get the teradataml DataFrame that contains the model.
24
+ Types: teradataml DataFrame
25
+
26
+ tokenizerdata:
27
+ Required Argument.
28
+ Specifies the tokenizer teradataml DataFrame
29
+ which contains the tokenizer json file.
30
+ Types: teradataml DataFrame
31
+
32
+ accumulate:
33
+ Required Argument.
34
+ Specifies the name(s) of input teradataml DataFrame column(s) to
35
+ copy to the output. By default, the function copies all input
36
+ teradataml DataFrame columns to the output.
37
+ Types: str OR list of Strings (str) OR Feature OR list of Features
38
+
39
+ model_output_tensor:
40
+ Required Argument.
41
+ Specifies the column of the model's possible output fields
42
+ that the user wants to calculate and output.
43
+ Types: str
44
+
45
+ encode_max_length:
46
+ Optional Argument.
47
+ Specifies the maximum length of the tokenizer output token
48
+ encodings(only applies for models with symbolic dimensions).
49
+ Default Value: 512
50
+ Types: int
51
+
52
+ show_model_properties:
53
+ Optional Argument.
54
+ Specifies the default or expanded "model_input_fields_map" based on
55
+ input model for defaults or "model_input_fields_map" for expansion.
56
+ Default Value: False
57
+ Types: bool
58
+
59
+ output_column_prefix:
60
+ Optional Argument.
61
+ Specifies the column prefix for each of the output columns
62
+ when using float32 "output_format".
63
+ Default Value: "emb_"
64
+ Types: str
65
+
66
+ output_format:
67
+ Optional Argument.
68
+ Specifies the output format for the model embeddings output.
69
+ Default Value: "VARBYTE(3072)"
70
+ Types: str
71
+
72
+ overwrite_cached_models:
73
+ Optional Argument.
74
+ Specifies the model name that needs to be removed from the cache.
75
+ When a model loaded into the memory of the node fits in the cache,
76
+ it stays in the cache until being evicted to make space for another
77
+ model that needs to be loaded. Therefore, a model can remain in the
78
+ cache even after the completion of function execution. Other functions
79
+ that use the same model can use it, saving the cost of reloading it
80
+ into memory. User should overwrite a cached model only when it is updated,
81
+ to make sure that the Predict function uses the updated model instead
82
+ of the cached model.
83
+ Note:
84
+ Do not use the "overwrite_cached_models" argument except when user
85
+ is trying to replace a previously cached model. Using the argument
86
+ in other cases, including in concurrent queries or multiple times
87
+ within a short period of time lead to an OOM error.
88
+ Default Value: "false"
89
+ Permitted Values: true, t, yes, y, 1, false, f, no, n, 0, *,
90
+ current_cached_model
91
+ Types: str
92
+
93
+ is_debug:
94
+ Optional Argument.
95
+ Specifies whether debug statements are added to a trace table or not.
96
+ When set to True, debug statements are added to a trace table that must
97
+ be created beforehand.
98
+ Notes:
99
+ * Only available with BYOM version 3.00.00.02 and later.
100
+ * To save logs for debugging, user can create an error log by using
101
+ the is_debug=True parameter in the predict functions.
102
+ A database trace table is used to collect this information which
103
+ does impact performance of the function, so using small data input
104
+ sizes is recommended.
105
+ * To generate this log, user must do the following:
106
+ 1. Create a global trace table with columns vproc_ID BYTE(2),
107
+ Sequence INTEGER, Trace_Output VARCHAR(31000)
108
+ 2. Turn on session function tracing:
109
+ SET SESSION FUNCTION TRACE USING '' FOR TABLE <trace_table_name_created_in_step_1>;
110
+ 3. Execute function with "is_debug" set to True.
111
+ 4. Debug information is logged to the table created in step 1.
112
+ 5. To turn off the logging, either disconnect from the session or
113
+ run following SQL:
114
+ SET SESSION FUNCTION TRACE OFF;
115
+ The trace table is temporary and the information is deleted if user
116
+ logs off from the session. If long term persistence is necessary,
117
+ user can copy the table to a permanent table before leaving the
118
+ session.
119
+ Default Value: False
120
+ Types: bool
121
+
122
+ enable_memory_check:
123
+ Optional Argument.
124
+ Specifies whether there is enough native memory for large models.
125
+ Default Value: True
126
+ Types: bool
127
+
128
+ **generic_arguments:
129
+ Specifies the generic keyword arguments SQLE functions accept. Below
130
+ are the generic keyword arguments:
131
+ persist:
132
+ Optional Argument.
133
+ Specifies whether to persist the results of the
134
+ function in a table or not. When set to True,
135
+ results are persisted in a table; otherwise,
136
+ results are garbage collected at the end of the
137
+ session.
138
+ Default Value: False
139
+ Types: bool
140
+
141
+ volatile:
142
+ Optional Argument.
143
+ Specifies whether to put the results of the
144
+ function in a volatile table or not. When set to
145
+ True, results are stored in a volatile table,
146
+ otherwise not.
147
+ Default Value: False
148
+ Types: bool
149
+
150
+ Function allows the user to partition, hash, order or local
151
+ order the input data. These generic arguments are available
152
+ for each argument that accepts teradataml DataFrame as
153
+ input and can be accessed as:
154
+ * "<input_data_arg_name>_partition_column" accepts str or
155
+ list of str (Strings) or PartitionKind
156
+ * "<input_data_arg_name>_hash_column" accepts str or list
157
+ of str (Strings)
158
+ * "<input_data_arg_name>_order_column" accepts str or list
159
+ of str (Strings)
160
+ * "local_order_<input_data_arg_name>" accepts boolean
161
+ Note:
162
+ These generic arguments are supported by teradataml if
163
+ the underlying SQL Engine function supports, else an
164
+ exception is raised.
165
+
166
+ RETURNS:
167
+ Instance of ONNXEmbeddings.
168
+ Output teradataml DataFrame can be accessed using attribute
169
+ references, such as ONNXEmbeddings.<attribute_name>.
170
+ Output teradataml DataFrame attribute name is:
171
+ result
172
+
173
+
174
+ RAISES:
175
+ TeradataMlException, TypeError, ValueError
176
+
177
+
178
+ EXAMPLES:
179
+ # Notes:
180
+ # 1. Get the connection to Vantage to execute the function.
181
+ # 2. One must import the required functions mentioned in
182
+ # the example from teradataml.
183
+ # 3. Function will raise error if not supported on the Vantage
184
+ # user is connected to.
185
+ # 4. To execute BYOM functions, set 'configure.byom_install_location' to the
186
+ # database name where BYOM functions are installed.
187
+
188
+ # Import required libraries / functions.
189
+ import os, teradataml
190
+ from teradataml import get_connection, DataFrame
191
+ from teradataml import save_byom, retrieve_byom, load_example_data
192
+ from teradataml import configure, display_analytic_functions, execute_sql
193
+
194
+ # Load example data.
195
+ load_example_data("byom", "amazon_reviews_25")
196
+
197
+ # Create teradataml DataFrame objects.
198
+ amazon_reviews_25 = DataFrame.from_table("amazon_reviews_25")
199
+
200
+ # Assigning txt column name to rev_txt column.
201
+ amazon_reviews_25 = amazon_reviews_25.assign(txt=amazon_reviews_25.rev_text)
202
+
203
+ # Set install location of BYOM functions.
204
+ configure.byom_install_location = "td_mldb"
205
+
206
+ # Check the list of available analytic functions.
207
+ display_analytic_functions(type="BYOM")
208
+
209
+ # Retrieve model.
210
+ modeldata = retrieve_byom("bge-small-en-v1.5", table_name="onnx_models")
211
+ tokenizerdata = retrieve_byom("bge-small-en-v1.5", table_name="embeddings_tokenizers")
212
+
213
+ # Assigning tokenizer_id, tokenizer to model_id, model in embeddings_tokenizers.
214
+ tokenizerdata_a1 = tokenizerdata.assign(tokenizer_id=tokenizerdata.model_id)
215
+ tokenizerdata_a2 = tokenizerdata_a1.assign(tokenizer=tokenizerdata_a1.model)
216
+
217
+ # Example 1: Calculate embedding values in Vantage with a bge-small-en-v1.5
218
+ # model that has been created outside the Vantage by removing all
219
+ # the all cached models.
220
+ ONNXEmbeddings_out_1 = ONNXEmbeddings(modeldata=modeldata,
221
+ tokenizerdata=tokenizerdata_a2.select(['tokenizer_id', 'tokenizer']),
222
+ newdata=amazon_reviews_25.select(["rev_id", "txt"]),
223
+ accumulate='rev_id',
224
+ model_output_tensor='sentence_embedding'
225
+ )
226
+
227
+ # Print the results.
228
+ print(ONNXEmbeddings_out_1.result)
229
+
230
+ # Example 2: Showcasing the model properties of bge-small-en-v1.5 model that has been
231
+ # created outside the Vantage by showcasing.
232
+ ONNXEmbeddings_out_2 = ONNXEmbeddings(modeldata=modeldata,
233
+ tokenizerdata=tokenizerdata_a2.select(['tokenizer_id', 'tokenizer']),
234
+ newdata=amazon_reviews_25.select(["rev_id", "txt"]),
235
+ accumulate='rev_id',
236
+ model_output_tensor='sentence_embedding',
237
+ show_model_properties=True
238
+ )
239
+
240
+ # Print the results.
241
+ print(ONNXEmbeddings_out_2.result)
242
+ """
@@ -40,7 +40,7 @@ def ONNXPredict(newdata=None, modeldata=None, accumulate=None, model_output_fiel
40
40
  Required Argument.
41
41
  Specifies the name(s) of input teradataml DataFrame column(s) to
42
42
  copy to the output.
43
- Types: str OR list of Strings (str)
43
+ Types: str OR list of Strings (str) OR Feature OR list of Features
44
44
 
45
45
  model_output_fields:
46
46
  Optional Argument.
@@ -146,7 +146,7 @@ def ONNXPredict(newdata=None, modeldata=None, accumulate=None, model_output_fiel
146
146
  for each argument that accepts teradataml DataFrame as
147
147
  input and can be accessed as:
148
148
  * "<input_data_arg_name>_partition_column" accepts str or
149
- list of str (Strings)
149
+ list of str (Strings) or PartitionKind
150
150
  * "<input_data_arg_name>_hash_column" accepts str or list
151
151
  of str (Strings)
152
152
  * "<input_data_arg_name>_order_column" accepts str or list
@@ -52,7 +52,7 @@ def PMMLPredict(newdata=None, modeldata=None, accumulate=None,
52
52
  Required Argument.
53
53
  Specifies the name(s) of input teradataml DataFrame column(s)
54
54
  to copy to the output DataFrame.
55
- Types: str OR list of Strings (str)
55
+ Types: str OR list of Strings (str) OR Feature OR list of Features
56
56
 
57
57
  model_output_fields:
58
58
  Optional Argument.
@@ -143,7 +143,7 @@ def PMMLPredict(newdata=None, modeldata=None, accumulate=None,
143
143
  for each argument that accepts teradataml DataFrame as
144
144
  input and can be accessed as:
145
145
  * "<input_data_arg_name>_partition_column" accepts str or
146
- list of str (Strings)
146
+ list of str (Strings) or PartitionKind
147
147
  * "<input_data_arg_name>_hash_column" accepts str or list
148
148
  of str (Strings)
149
149
  * "<input_data_arg_name>_order_column" accepts str or list
@@ -131,7 +131,7 @@ def NaiveBayes(data = None, response_column = None, numeric_inputs = None,
131
131
  display_analytic_functions()
132
132
 
133
133
  # Import function NaiveBayes.
134
- from teradataml import NaiveBayes
134
+ from teradataml import NaiveBayes, Unpivoting
135
135
 
136
136
  # Example 1: NaiveBayes function to generate classification model using Dense input.
137
137
  NaiveBayes_out = NaiveBayes(data=housing_train, response_column='homestyle',
@@ -1,4 +1,4 @@
1
- def Shap(data = None, object = None, training_function = "TD_GLM",
1
+ def Shap(data = None, object = None, id_column=None, training_function = None,
2
2
  model_type = "Regression", input_columns = None, detailed = False,
3
3
  accumulate = None, num_parallel_trees = 1000, num_boost_rounds = 10,
4
4
  **generic_arguments):
@@ -19,11 +19,16 @@ def Shap(data = None, object = None, training_function = "TD_GLM",
19
19
  Required Argument.
20
20
  Specifies the teradataml DataFrame containing the model data.
21
21
  Types: teradataml DataFrame
22
+
23
+ id_column:
24
+ Required Argument.
25
+ Specifies the input data column name that has the unique identifier
26
+ for each row in the "data".
27
+ Types: str
22
28
 
23
29
  training_function:
24
30
  Required Argument.
25
31
  Specifies the model type name.
26
- Default Value: "TD_GLM"
27
32
  Permitted Values: TD_GLM, TD_DECISIONFOREST, TD_XGBOOST
28
33
  Types: str
29
34
 
@@ -44,6 +49,9 @@ def Shap(data = None, object = None, training_function = "TD_GLM",
44
49
  Optional Argument.
45
50
  Specifies whether to output detailed shap information about the
46
51
  forest trees.
52
+ Note:
53
+ * It is only supported for "TD_XGBOOST" and "TD_DECISIONFOREST"
54
+ training functions.
47
55
  Default Value: False
48
56
  Types: bool
49
57
 
@@ -145,10 +153,10 @@ def Shap(data = None, object = None, training_function = "TD_GLM",
145
153
 
146
154
  # Example 1: Shap for classification model.
147
155
  XGBoost_out = XGBoost(data=iris_input,
148
- input_columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
149
- response_column = 'species',
150
- model_type='Classification',
151
- iter_num=25)
156
+ input_columns=['sepal_length', 'sepal_width', 'petal_length', 'petal_width'],
157
+ response_column = 'species',
158
+ model_type='Classification',
159
+ iter_num=25)
152
160
 
153
161
  Shap_out = Shap(data=iris_input,
154
162
  object=XGBoost_out.result,
@@ -194,4 +202,24 @@ def Shap(data = None, object = None, training_function = "TD_GLM",
194
202
 
195
203
  # Print the result DataFrame.
196
204
  print(Shap_out2.output_data)
205
+
206
+ # Example 3: Shap for GLM model.
207
+ from teradataml import GLM
208
+ GLM_out = GLM(data=transform_obj.result,
209
+ input_columns=['MedInc', 'HouseAge', 'AveRooms',
210
+ 'AveBedrms', 'Population', 'AveOccup',
211
+ 'Latitude', 'Longitude'],
212
+ response_column="MedHouseVal",
213
+ family="GAUSSIAN")
214
+
215
+ Shap_out3 = Shap(data=transform_obj.result,
216
+ object=GLM_out.result,
217
+ id_column='id',
218
+ training_function="TD_GLM",
219
+ model_type="Regression",
220
+ input_columns=['MedInc', 'HouseAge', 'AveRooms','AveBedrms', 'Population', 'AveOccup','Latitude', 'Longitude'],
221
+ detailed=False)
222
+
223
+ # Print the result DataFrame.
224
+ print(Shap_out3.output_data)
197
225
  """
@@ -133,10 +133,10 @@ def TDNaiveBayesPredict(data = None, object = None, id_column = None,
133
133
  # Check the list of available analytic functions.
134
134
  display_analytic_functions()
135
135
 
136
- # Import function NaiveBayesPredict.
137
- from teradataml import NaiveBayesPredict
136
+ # Import function TDNaiveBayesPredict.
137
+ from teradataml import TDNaiveBayesPredict, NaiveBayes, Unpivoting
138
138
 
139
- # Example 1: NaiveBayesPredict function to predict the classification label using Dense input.
139
+ # Example 1: TDNaiveBayesPredict function to predict the classification label using Dense input.
140
140
  NaiveBayes_out = NaiveBayes(data=housing_train, response_column='homestyle',
141
141
  numeric_inputs=['price','lotsize','bedrooms','bathrms','stories','garagepl'],
142
142
  categorical_inputs=['driveway','recroom','fullbase','gashw','airco','prefarea'])
@@ -152,7 +152,7 @@ def TDNaiveBayesPredict(data = None, object = None, id_column = None,
152
152
  # Print the result DataFrame.
153
153
  print( NaiveBayesPredict_out.result)
154
154
 
155
- # Example 2: NaiveBayesPredict function to predict the classification label using Sparse input.
155
+ # Example 2: TDNaiveBayesPredict function to predict the classification label using Sparse input.
156
156
 
157
157
  # Unpivoting the data for sparse input to naive bayes.
158
158
  upvt_train = Unpivoting(data = housing_train, id_column = 'sn',
@@ -1,4 +1,4 @@
1
- def TextParser(data=None, object=None, text_column=None, covert_to_lowercase=True, stem_tokens=False,
1
+ def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=True, stem_tokens=False,
2
2
  remove_stopwords=False, accumulate=None, delimiter=" \t\n\f\r",
3
3
  punctuation="!#$%&()*+,-./:;?@\^_`{|}~", token_col_name=None, **generic_arguments):
4
4
  """
@@ -38,7 +38,7 @@ def TextParser(data=None, object=None, text_column=None, covert_to_lowercase=Tru
38
38
  Specifies the name of the input data column whose contents are to be tokenized.
39
39
  Types: str
40
40
 
41
- covert_to_lowercase:
41
+ convert_to_lowercase:
42
42
  Optional Argument.
43
43
  Specifies whether to convert the text in "text_column" to lowercase.
44
44
  Default Value: True
@@ -165,7 +165,7 @@ def TextParser(data=None, object=None, text_column=None, covert_to_lowercase=Tru
165
165
  # Example 2 : Convert words in "text_data" column into their root forms.
166
166
  TextParser_out = TextParser(data=complaints,
167
167
  text_column="text_data",
168
- covert_to_lowercase=True,
168
+ convert_to_lowercase=True,
169
169
  stem_tokens=True)
170
170
 
171
171
  # Print the result DataFrame.
@@ -0,0 +1,118 @@
1
+ def Image2Matrix(data=None,
2
+ output='gray',
3
+ **generic_arguments):
4
+ """
5
+ DESCRIPTION:
6
+ Image2Matrix() function converts an image to a matrix.
7
+ It converts JPEG or PNG images to matrixes with payload values being the pixel values.
8
+ Note:
9
+ * The image size cannot be greater than 16 MB.
10
+ * The image should not exceed 4,000,000 pixels.
11
+
12
+ PARAMETERS:
13
+ data:
14
+ Required Argument.
15
+ Specifies the teradataml DataFrame which has image details.
16
+ Types: Teradataml DataFrame
17
+
18
+ output:
19
+ Optional Argument.
20
+ Specifies the type of output matrix.
21
+ Default: 'gray'
22
+ Permitted Values:
23
+ 'gray': Converts the image to a grayscale matrix.
24
+ 'rgb': Converts the image to a RGB matrix.
25
+ Types: str
26
+
27
+ **generic_arguments:
28
+ Specifies the generic keyword arguments SQLE functions accept.
29
+ Below are the generic keyword arguments:
30
+ persist:
31
+ Optional Argument.
32
+ Specifies whether to persist the results of the function in table or not.
33
+ When set to True, results are persisted in table; otherwise, results
34
+ are garbage collected at the end of the session.
35
+ Default Value: False
36
+ Types: boolean
37
+
38
+ volatile:
39
+ Optional Argument.
40
+ Specifies whether to put the results of the function in volatile table or not.
41
+ When set to True, results are stored in volatile table, otherwise not.
42
+ Default Value: False
43
+ Types: boolean
44
+
45
+ Function allows the user to partition, hash, order or local order the input
46
+ data. These generic arguments are available for each argument that accepts
47
+ teradataml DataFrame as input and can be accessed as:
48
+ * "<input_data_arg_name>_partition_column" accepts str or list of str (Strings)
49
+ * "<input_data_arg_name>_hash_column" accepts str or list of str (Strings)
50
+ * "<input_data_arg_name>_order_column" accepts str or list of str (Strings)
51
+ * "local_order_<input_data_arg_name>" accepts boolean
52
+ Note:
53
+ These generic arguments are supported by teradataml if the underlying Analytic Database
54
+ function supports, else an exception is raised.
55
+
56
+ RETURNS:
57
+ Instance of Image2Matrix.
58
+ Output teradataml DataFrames can be accessed using attribute
59
+ references, such as Image2Matrix.<attribute_name>.
60
+ Output teradataml DataFrame attribute name is:
61
+ result
62
+
63
+ RAISES:
64
+ TeradataMlException, TypeError, ValueError
65
+
66
+ EXAMPLES:
67
+ # Notes:
68
+ # 1. Get the connection to Vantage, before importing the
69
+ # function in user space.
70
+ # 2. User can import the function, if it is available on
71
+ # Vantage user is connected to.
72
+ # 3. To check the list of UAF analytic functions available
73
+ # on Vantage user connected to, use
74
+ # "display_analytic_functions()".
75
+
76
+ # Check the list of available analytic functions.
77
+ display_analytic_functions()
78
+
79
+ # Import function Image2Matrix.
80
+ from teradataml import Image2Matrix
81
+ import teradataml
82
+
83
+ # Drop the image table if it is present.
84
+ try:
85
+ db_drop_table('imageTable')
86
+ except:
87
+ pass
88
+
89
+ # Create a table to store the image data.
90
+ execute_sql('CREATE TABLE imageTable(id INTEGER, image BLOB);')
91
+
92
+ # Load the image data into the fileContent variable.
93
+ file_dir = os.path.join(os.path.dirname(teradataml.__file__), "data")
94
+ with open(os.path.join(file_dir,'peppers.png'), mode='rb') as file:
95
+ fileContent = file.read()
96
+
97
+ # Insert the image data into the table.
98
+ sql = 'INSERT INTO imageTable VALUES(?, ?);'
99
+ parameters = (1, fileContent)
100
+ execute_sql(sql, parameters)
101
+
102
+ # Create a DataFrame for the image table.
103
+ imageTable = DataFrame('imageTable')
104
+
105
+ # Example 1: Convert the image to matrix with gray values.
106
+ image2matrix = Image2Matrix(data=imageTable.select(['id', 'image']),
107
+ output='gray')
108
+
109
+ # Print the result DataFrame.
110
+ print(image2matrix.result)
111
+
112
+ # Example 2: Convert the image to matrix with rgb values.
113
+ image2matrix2 = Image2Matrix(data=imageTable.select(['id', 'image']),
114
+ output='rgb')
115
+
116
+ # Print the result DataFrame.
117
+ print(image2matrix2.result)
118
+ """
@@ -0,0 +1,145 @@
1
+ def CopyArt(data=None, database_name = None,
2
+ table_name = None, map_name = None,
3
+ **generic_arguments):
4
+ """
5
+ DESCRIPTION:
6
+ CopyArt() function creates a copy of an existing analytics result table (ART).
7
+
8
+ PARAMETERS:
9
+ data:
10
+ Required Argument.
11
+ Specifies the ART data to be copied.
12
+ Types: DataFrame
13
+
14
+ database_name:
15
+ Required Argument.
16
+ Specifies the name of the destination database for copied ART.
17
+ Types: str
18
+
19
+ table_name:
20
+ Required Argument.
21
+ Specifies the name of the destination table for copied ART.
22
+ Types: str
23
+
24
+ map_name:
25
+ Optional Argument.
26
+ Specifies the name of the map for the destination ART.
27
+ By default, it refers to the map of the 'data'.
28
+ Types: str
29
+
30
+ **generic_arguments:
31
+ Specifies the generic keyword arguments of UAF functions.
32
+ Below are the generic keyword arguments:
33
+ persist:
34
+ Optional Argument.
35
+ Specifies whether to persist the results of the
36
+ function in a table or not. When set to True,
37
+ results are persisted in a table; otherwise,
38
+ results are garbage collected at the end of the
39
+ session.
40
+ Note that, when UAF function is executed, an
41
+ analytic result table (ART) is created.
42
+ Default Value: False
43
+ Types: bool
44
+
45
+ volatile:
46
+ Optional Argument.
47
+ Specifies whether to put the results of the
48
+ function in a volatile ART or not. When set to
49
+ True, results are stored in a volatile ART,
50
+ otherwise not.
51
+ Default Value: False
52
+ Types: bool
53
+
54
+ output_table_name:
55
+ Optional Argument.
56
+ Specifies the name of the table to store results.
57
+ If not specified, a unique table name is internally
58
+ generated.
59
+ Types: str
60
+
61
+ output_db_name:
62
+ Optional Argument.
63
+ Specifies the name of the database to create output
64
+ table into. If not specified, table is created into
65
+ database specified by the user at the time of context
66
+ creation or configuration parameter. Argument is ignored,
67
+ if "output_table_name" is not specified.
68
+ Types: str
69
+
70
+ RETURNS:
71
+ Instance of CopyArt.
72
+ Output teradataml DataFrames can be accessed using attribute
73
+ references, such as obj.<attribute_name>.
74
+ Output teradataml DataFrame attribute name is:
75
+ 1. result
76
+
77
+ RAISES:
78
+ TeradataMlException, TypeError, ValueError
79
+
80
+ EXAMPLES:
81
+ # Notes:
82
+ # 1. Get the connection to Vantage, before importing the
83
+ # function in user space.
84
+ # 2. User can import the function, if it is available on
85
+ # Vantage user is connected to.
86
+ # 3. To check the list of UAF analytic functions available
87
+ # on Vantage user connected to, use
88
+ # "display_analytic_functions()".
89
+
90
+ # Check the list of available UAF analytic functions.
91
+ display_analytic_functions(type="UAF")
92
+
93
+ # Import function CopyArt.
94
+ from teradataml import CopyArt, AutoArima
95
+
96
+ # Load the example data.
97
+ load_example_data("uaf", ["blood2ageandweight"])
98
+
99
+ # Create teradataml DataFrame object.
100
+ data = DataFrame.from_table("blood2ageandweight")
101
+
102
+ # Create teradataml TDSeries object.
103
+ data_series_df = TDSeries(data=data,
104
+ id="PatientID",
105
+ row_index="SeqNo",
106
+ row_index_style="SEQUENCE",
107
+ payload_field="BloodFat",
108
+ payload_content="REAL")
109
+
110
+ # Execute AutoArima function to create ART.
111
+ uaf_out = AutoArima(data=data_series_df,
112
+ start_pq_nonseasonal=[1, 1],
113
+ seasonal=False,
114
+ constant=True,
115
+ algorithm="MLE",
116
+ fit_percentage=80,
117
+ stepwise=True,
118
+ nmodels=7,
119
+ fit_metrics=True,
120
+ residuals=True)
121
+
122
+ # Example 1: Execute CopyArt function to copy ART to a destination table name
123
+ # with persist option.
124
+ res = CopyArt(data=uaf_out.result,
125
+ database_name="alice",
126
+ table_name="copied_table",
127
+ persist=True)
128
+ print(res.result)
129
+
130
+ # Example 2: Execute CopyArt function to copy ART to a destination table name.
131
+ res = CopyArt(data=uaf_out.result,
132
+ database_name="alice",
133
+ table_name="copied_table2")
134
+
135
+ # Print the result DataFrame.
136
+ print(res.result)
137
+
138
+ # Example 3: Copy ART to a destination table name using uaf object.
139
+ res = uaf_out.copy(database_name="alice",
140
+ table_name="copied_table3")
141
+
142
+ # Print the result DataFrame.
143
+ print(res.result)
144
+
145
+ """