teradataml 20.0.0.0__py3-none-any.whl → 20.0.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (263) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +183 -0
  4. teradataml/__init__.py +6 -3
  5. teradataml/_version.py +2 -2
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +275 -40
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +1 -0
  11. teradataml/analytics/json_parser/utils.py +17 -21
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +10 -2
  15. teradataml/analytics/table_operator/__init__.py +3 -2
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +62 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1553 -319
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +276 -319
  22. teradataml/automl/data_transformation.py +163 -81
  23. teradataml/automl/feature_engineering.py +402 -239
  24. teradataml/automl/feature_exploration.py +9 -2
  25. teradataml/automl/model_evaluation.py +48 -51
  26. teradataml/automl/model_training.py +291 -189
  27. teradataml/catalog/byom.py +8 -8
  28. teradataml/catalog/model_cataloging_utils.py +1 -1
  29. teradataml/clients/auth_client.py +133 -0
  30. teradataml/clients/pkce_client.py +1 -1
  31. teradataml/common/aed_utils.py +3 -2
  32. teradataml/common/constants.py +48 -6
  33. teradataml/common/deprecations.py +13 -7
  34. teradataml/common/garbagecollector.py +156 -120
  35. teradataml/common/messagecodes.py +6 -1
  36. teradataml/common/messages.py +3 -1
  37. teradataml/common/sqlbundle.py +1 -1
  38. teradataml/common/utils.py +103 -11
  39. teradataml/common/wrapper_utils.py +1 -1
  40. teradataml/context/context.py +121 -31
  41. teradataml/data/advertising.csv +201 -0
  42. teradataml/data/bank_marketing.csv +11163 -0
  43. teradataml/data/bike_sharing.csv +732 -0
  44. teradataml/data/boston2cols.csv +721 -0
  45. teradataml/data/breast_cancer.csv +570 -0
  46. teradataml/data/complaints_test_tokenized.csv +353 -0
  47. teradataml/data/complaints_tokens_model.csv +348 -0
  48. teradataml/data/covid_confirm_sd.csv +83 -0
  49. teradataml/data/customer_segmentation_test.csv +2628 -0
  50. teradataml/data/customer_segmentation_train.csv +8069 -0
  51. teradataml/data/dataframe_example.json +10 -0
  52. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingFit.py +3 -1
  53. teradataml/data/docs/sqle/docs_17_10/OneHotEncodingTransform.py +6 -0
  54. teradataml/data/docs/sqle/docs_17_10/OutlierFilterTransform.py +5 -1
  55. teradataml/data/docs/sqle/docs_17_20/ANOVA.py +61 -1
  56. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  57. teradataml/data/docs/sqle/docs_17_20/ColumnTransformer.py +2 -0
  58. teradataml/data/docs/sqle/docs_17_20/FTest.py +105 -26
  59. teradataml/data/docs/sqle/docs_17_20/GLM.py +162 -1
  60. teradataml/data/docs/sqle/docs_17_20/GetFutileColumns.py +5 -3
  61. teradataml/data/docs/sqle/docs_17_20/KMeans.py +48 -1
  62. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  63. teradataml/data/docs/sqle/docs_17_20/NonLinearCombineFit.py +3 -2
  64. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingFit.py +5 -0
  65. teradataml/data/docs/sqle/docs_17_20/OneHotEncodingTransform.py +6 -0
  66. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  67. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  68. teradataml/data/docs/sqle/docs_17_20/ROC.py +3 -2
  69. teradataml/data/docs/sqle/docs_17_20/SVMPredict.py +13 -2
  70. teradataml/data/docs/sqle/docs_17_20/ScaleFit.py +119 -1
  71. teradataml/data/docs/sqle/docs_17_20/ScaleTransform.py +93 -1
  72. teradataml/data/docs/sqle/docs_17_20/Shap.py +197 -0
  73. teradataml/data/docs/sqle/docs_17_20/TDGLMPredict.py +163 -1
  74. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  75. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  76. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  77. teradataml/data/docs/sqle/docs_17_20/XGBoost.py +12 -4
  78. teradataml/data/docs/sqle/docs_17_20/XGBoostPredict.py +7 -1
  79. teradataml/data/docs/sqle/docs_17_20/ZTest.py +72 -7
  80. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  81. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  82. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  83. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  84. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  85. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  86. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  87. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  88. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  89. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  90. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  91. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  92. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  93. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  94. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  95. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  96. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  97. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  98. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  99. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  100. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  101. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  102. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  103. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  104. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  105. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  106. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  107. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  108. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  109. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  110. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  111. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  112. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  113. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  114. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  115. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  116. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  117. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  118. teradataml/data/dwt2d_dataTable.csv +65 -0
  119. teradataml/data/dwt_dataTable.csv +8 -0
  120. teradataml/data/dwt_filterTable.csv +3 -0
  121. teradataml/data/finance_data4.csv +13 -0
  122. teradataml/data/glm_example.json +28 -1
  123. teradataml/data/grocery_transaction.csv +19 -0
  124. teradataml/data/housing_train_segment.csv +201 -0
  125. teradataml/data/idwt2d_dataTable.csv +5 -0
  126. teradataml/data/idwt_dataTable.csv +8 -0
  127. teradataml/data/idwt_filterTable.csv +3 -0
  128. teradataml/data/insect2Cols.csv +61 -0
  129. teradataml/data/interval_data.csv +5 -0
  130. teradataml/data/jsons/paired_functions.json +14 -0
  131. teradataml/data/jsons/sqle/17.20/TD_ANOVA.json +99 -27
  132. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  133. teradataml/data/jsons/sqle/17.20/TD_FTest.json +166 -83
  134. teradataml/data/jsons/sqle/17.20/TD_GLM.json +90 -14
  135. teradataml/data/jsons/sqle/17.20/TD_GLMPREDICT.json +48 -5
  136. teradataml/data/jsons/sqle/17.20/TD_GetFutileColumns.json +5 -3
  137. teradataml/data/jsons/sqle/17.20/TD_KMeans.json +31 -11
  138. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  139. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  140. teradataml/data/jsons/sqle/17.20/TD_NonLinearCombineFit.json +3 -2
  141. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  142. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  143. teradataml/data/jsons/sqle/17.20/TD_ROC.json +2 -1
  144. teradataml/data/jsons/sqle/17.20/TD_SVM.json +16 -16
  145. teradataml/data/jsons/sqle/17.20/TD_SVMPredict.json +19 -1
  146. teradataml/data/jsons/sqle/17.20/TD_ScaleFit.json +168 -15
  147. teradataml/data/jsons/sqle/17.20/TD_ScaleTransform.json +50 -1
  148. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  149. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  150. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  151. teradataml/data/jsons/sqle/17.20/TD_XGBoost.json +25 -7
  152. teradataml/data/jsons/sqle/17.20/TD_XGBoostPredict.json +17 -4
  153. teradataml/data/jsons/sqle/17.20/TD_ZTest.json +157 -80
  154. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  155. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  156. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  157. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  158. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  159. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  160. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  161. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  162. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  163. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  164. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  165. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  166. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  167. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  168. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  169. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  170. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  171. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  172. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  173. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  174. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  175. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  176. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  177. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  178. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  179. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  180. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  181. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  182. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  183. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  184. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  185. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  186. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  187. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  188. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  189. teradataml/data/jsons/uaf/17.20/TD_SAX.json +208 -0
  190. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  191. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  192. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  193. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  194. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  195. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +400 -0
  196. teradataml/data/kmeans_example.json +5 -0
  197. teradataml/data/kmeans_table.csv +10 -0
  198. teradataml/data/load_example_data.py +8 -2
  199. teradataml/data/naivebayestextclassifier_example.json +1 -1
  200. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  201. teradataml/data/onehot_encoder_train.csv +4 -0
  202. teradataml/data/openml_example.json +29 -0
  203. teradataml/data/peppers.png +0 -0
  204. teradataml/data/real_values.csv +14 -0
  205. teradataml/data/sax_example.json +8 -0
  206. teradataml/data/scale_attributes.csv +3 -0
  207. teradataml/data/scale_example.json +52 -1
  208. teradataml/data/scale_input_part_sparse.csv +31 -0
  209. teradataml/data/scale_input_partitioned.csv +16 -0
  210. teradataml/data/scale_input_sparse.csv +11 -0
  211. teradataml/data/scale_parameters.csv +3 -0
  212. teradataml/data/scripts/deploy_script.py +21 -2
  213. teradataml/data/scripts/sklearn/sklearn_fit.py +40 -37
  214. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +22 -30
  215. teradataml/data/scripts/sklearn/sklearn_function.template +42 -24
  216. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +23 -33
  217. teradataml/data/scripts/sklearn/sklearn_neighbors.py +19 -28
  218. teradataml/data/scripts/sklearn/sklearn_score.py +32 -32
  219. teradataml/data/scripts/sklearn/sklearn_transform.py +85 -42
  220. teradataml/data/star_pivot.csv +8 -0
  221. teradataml/data/templates/open_source_ml.json +2 -1
  222. teradataml/data/teradataml_example.json +97 -1
  223. teradataml/data/timestamp_data.csv +4 -0
  224. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  225. teradataml/data/uaf_example.json +55 -1
  226. teradataml/data/unpivot_example.json +15 -0
  227. teradataml/data/url_data.csv +9 -0
  228. teradataml/data/windowdfft.csv +16 -0
  229. teradataml/data/ztest_example.json +16 -0
  230. teradataml/dataframe/copy_to.py +9 -4
  231. teradataml/dataframe/data_transfer.py +125 -64
  232. teradataml/dataframe/dataframe.py +575 -57
  233. teradataml/dataframe/dataframe_utils.py +47 -9
  234. teradataml/dataframe/fastload.py +273 -90
  235. teradataml/dataframe/functions.py +339 -0
  236. teradataml/dataframe/row.py +160 -0
  237. teradataml/dataframe/setop.py +2 -2
  238. teradataml/dataframe/sql.py +740 -18
  239. teradataml/dataframe/window.py +1 -1
  240. teradataml/dbutils/dbutils.py +324 -18
  241. teradataml/geospatial/geodataframe.py +1 -1
  242. teradataml/geospatial/geodataframecolumn.py +1 -1
  243. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  244. teradataml/lib/aed_0_1.dll +0 -0
  245. teradataml/opensource/sklearn/_sklearn_wrapper.py +254 -122
  246. teradataml/options/__init__.py +16 -5
  247. teradataml/options/configure.py +39 -6
  248. teradataml/options/display.py +2 -2
  249. teradataml/plot/axis.py +4 -4
  250. teradataml/scriptmgmt/UserEnv.py +26 -19
  251. teradataml/scriptmgmt/lls_utils.py +120 -16
  252. teradataml/table_operators/Script.py +4 -5
  253. teradataml/table_operators/TableOperator.py +160 -26
  254. teradataml/table_operators/table_operator_util.py +88 -41
  255. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  256. teradataml/telemetry_utils/__init__.py +0 -0
  257. teradataml/telemetry_utils/queryband.py +52 -0
  258. teradataml/utils/validators.py +41 -3
  259. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/METADATA +191 -6
  260. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/RECORD +263 -185
  261. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/WHEEL +0 -0
  262. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/top_level.txt +0 -0
  263. {teradataml-20.0.0.0.dist-info → teradataml-20.0.0.2.dist-info}/zip-safe +0 -0
@@ -143,5 +143,15 @@
143
143
  "smoker" : "varchar(4)",
144
144
  "region" : "varchar(12)",
145
145
  "charges" : "real"
146
+ },
147
+ "grocery_transaction":{
148
+ "tranid" : "integer",
149
+ "period" : "varchar(20)",
150
+ "storeid" : "integer",
151
+ "region" : "varchar(20)",
152
+ "item" : "varchar(20)",
153
+ "sku" : "integer",
154
+ "category" : "varchar(20)"
155
+
146
156
  }
147
157
  }
@@ -7,7 +7,9 @@ def OneHotEncodingFit(data=None, is_input_dense=None, target_column=None, catego
7
7
  Such as, target attributes and their categorical values to be encoded and other parameters.
8
8
  Output of OneHotEncodingFit() function is used by OneHotEncodingTransform() function for encoding
9
9
  the input data. It supports inputs in both sparse and dense format.
10
-
10
+ Note:
11
+ * For input to be considered as sparse input, column names must be provided for
12
+ 'data_partition_column' argument.
11
13
 
12
14
  PARAMETERS:
13
15
  data:
@@ -3,6 +3,12 @@ def OneHotEncodingTransform(data=None, object=None, is_input_dense=None, **gener
3
3
  DESCRIPTION:
4
4
  Function encodes specified attributes and categorical values as one-hot numeric vectors,
5
5
  using OneHotEncodingFit() function output.
6
+ Notes:
7
+ * In case of sparse input, neither 'data_partition_column' nor
8
+ 'object_partition_column' can be used independently.
9
+ * In case of dense input, if 'data_partition_column' is having value
10
+ PartitionKind.ANY, then 'object_partition_column' should have value
11
+ PartitionKind.DIMENSION.
6
12
 
7
13
 
8
14
  PARAMETERS:
@@ -5,7 +5,11 @@ def OutlierFilterTransform(data=None, object=None, **generic_arguments):
5
5
  OutlierFilterTransform() uses the result DataFrame from OutlierFilterFit() function to get
6
6
  statistics like median, count of rows, lower percentile and upper percentile for every column
7
7
  specified in target columns argument and filters the outliers in the input data.
8
-
8
+ Notes:
9
+ * Partitioning of input data and model is allowed using 'data_partition_column' and
10
+ 'object_partition_column' only if 'group_columns' are passed while creating model
11
+ using OutlierFilterFit() function.
12
+ * Neither 'data_partition_column' nor 'object_partition_column' can be used independently.
9
13
 
10
14
  PARAMETERS:
11
15
  data:
@@ -1,4 +1,6 @@
1
- def ANOVA(data=None, group_columns=None, alpha=0.05, **generic_arguments):
1
+ def ANOVA(data=None, group_columns=None, alpha=0.05, group_name_column=None,
2
+ group_value_column=None, group_names=None, num_groups=None,
3
+ **generic_arguments):
2
4
  """
3
5
  DESCRIPTION:
4
6
  The ANOVA() function performs one-way ANOVA (Analysis of Variance) on
@@ -37,6 +39,42 @@ def ANOVA(data=None, group_columns=None, alpha=0.05, **generic_arguments):
37
39
  Default Value: 0.05
38
40
  Types: float
39
41
 
42
+ group_name_column:
43
+ Optional Argument.
44
+ Specifies the column name in "data" containing the names of the groups
45
+ included in the computation.
46
+ Note:
47
+ * This argument is used when data contains group names in a column
48
+ and group values in another column.
49
+ * This argument must be used in conjunction with "group_value_column".
50
+ Types: str
51
+
52
+ group_value_column:
53
+ Optional Argument.
54
+ Specifies the column name in "data" containing the values for each group member.
55
+ Note:
56
+ * This argument is used when data contains group values in a column
57
+ and group names in another column.
58
+ * This argument must be used in conjunction with "group_name_column".
59
+ Types: str
60
+
61
+ group_names:
62
+ Optional Argument.
63
+ Specifies the names of the groups included in the computation.
64
+ Note:
65
+ * This argument is used when data contains group values in a column
66
+ and group names in another column.
67
+ Types: list of Strings (str)
68
+
69
+ num_groups:
70
+ Optional Argument.
71
+ Specifies the number of different groups in the "data" included
72
+ in the computation.
73
+ Note:
74
+ * This argument is used when data contains group values in a column
75
+ and group names in another column.
76
+ Types: int
77
+
40
78
  **generic_arguments:
41
79
  Specifies the generic keyword arguments SQLE functions accept. Below
42
80
  are the generic keyword arguments:
@@ -97,9 +135,11 @@ def ANOVA(data=None, group_columns=None, alpha=0.05, **generic_arguments):
97
135
 
98
136
  # Load the example data.
99
137
  load_example_data("teradataml", ["insect_sprays"])
138
+ load_example_data("ztest", 'insect2Cols')
100
139
 
101
140
  # Create teradataml DataFrame objects.
102
141
  insect_sprays = DataFrame.from_table("insect_sprays")
142
+ insect_gp = DataFrame.from_table("insect2Cols")
103
143
 
104
144
  # Check the list of available analytic functions.
105
145
  display_analytic_functions()
@@ -123,4 +163,24 @@ def ANOVA(data=None, group_columns=None, alpha=0.05, **generic_arguments):
123
163
  # Print the result DataFrame.
124
164
  print(ANOVA_out_2.result)
125
165
 
166
+ # Example 3 : Perform one-way anova analysis on a data set with more
167
+ # than two groups and group_name_column, group_value_column,
168
+ # group_names.
169
+ ANOVA_out_3 = ANOVA(data = insect_gp,
170
+ group_name_column='groupName',
171
+ group_value_column='groupValue',
172
+ group_names=['groupA', 'groupB', 'groupC'])
173
+
174
+ # Print the result DataFrame.
175
+ print(ANOVA_out_3.result)
176
+
177
+ # Example 4 : Perform one-way anova analysis on a data set with more
178
+ # than two groups and num_groups.
179
+ ANOVA_out_4 = ANOVA(data = insect_gp,
180
+ group_name_column='groupName',
181
+ group_value_column='groupValue',
182
+ num_groups=6)
183
+
184
+ # Print the result DataFrame.
185
+ print(ANOVA_out_4.result)
126
186
  """
@@ -0,0 +1,132 @@
1
+ def CFilter(data = None, target_column = None, transaction_id_columns = None,
2
+ partition_columns = None, max_distinct_items = 100,
3
+ **generic_arguments):
4
+
5
+ """
6
+ DESCRIPTION:
7
+ Function calculates several statistical measures of how likely
8
+ each pair of items is to be purchased together.
9
+
10
+ PARAMETERS:
11
+ data:
12
+ Required Argument.
13
+ Specifies the input teradataml DataFrame.
14
+ Types: teradataml DataFrame
15
+
16
+ target_column:
17
+ Required Argument.
18
+ Specifies name of the column from the "data" containing data for filtration.
19
+ Types: str
20
+
21
+ transaction_id_columns:
22
+ Required Argument.
23
+ Specifies the name of the columns in "data" containing transaction id that defines the groups of items listed
24
+ in the input columns that are purchased together.
25
+ Types: str OR list of Strings (str)
26
+
27
+ partition_columns:
28
+ Optional Argument.
29
+ Specifies the name of the column in "data" to partition the data on.
30
+ Types: str OR list of Strings (str)
31
+
32
+ max_distinct_items:
33
+ Optional Argument.
34
+ Specifies the maximum size of the item set.
35
+ Default Value: 100
36
+ Types: int
37
+
38
+ **generic_arguments:
39
+ Specifies the generic keyword arguments SQLE functions accept. Below
40
+ are the generic keyword arguments:
41
+ persist:
42
+ Optional Argument.
43
+ Specifies whether to persist the results of the
44
+ function in a table or not. When set to True,
45
+ results are persisted in a table; otherwise,
46
+ results are garbage collected at the end of the
47
+ session.
48
+ Default Value: False
49
+ Types: bool
50
+
51
+ volatile:
52
+ Optional Argument.
53
+ Specifies whether to put the results of the
54
+ function in a volatile table or not. When set to
55
+ True, results are stored in a volatile table,
56
+ otherwise not.
57
+ Default Value: False
58
+ Types: bool
59
+
60
+ Function allows the user to partition, hash, order or local
61
+ order the input data. These generic arguments are available
62
+ for each argument that accepts teradataml DataFrame as
63
+ input and can be accessed as:
64
+ * "<input_data_arg_name>_partition_column" accepts str or
65
+ list of str (Strings)
66
+ * "<input_data_arg_name>_hash_column" accepts str or list
67
+ of str (Strings)
68
+ * "<input_data_arg_name>_order_column" accepts str or list
69
+ of str (Strings)
70
+ * "local_order_<input_data_arg_name>" accepts boolean
71
+ Note:
72
+ These generic arguments are supported by teradataml if
73
+ the underlying SQL Engine function supports, else an
74
+ exception is raised.
75
+
76
+ RETURNS:
77
+ Instance of CFilter.
78
+ Output teradataml DataFrames can be accessed using attribute
79
+ references, such as CFilterObj.<attribute_name>.
80
+ Output teradataml DataFrame attribute name is:
81
+ result
82
+
83
+
84
+ RAISES:
85
+ TeradataMlException, TypeError, ValueError
86
+
87
+
88
+ EXAMPLES:
89
+ # Notes:
90
+ # 1. Get the connection to Vantage, before importing the
91
+ # function in user space.
92
+ # 2. User can import the function, if it is available on
93
+ # Vantage user is connected to.
94
+ # 3. To check the list of analytic functions available on
95
+ # Vantage user connected to, use
96
+ # "display_analytic_functions()".
97
+
98
+ # Load the example data.
99
+ load_example_data("dataframe", ["grocery_transaction"])
100
+
101
+ # Create teradataml DataFrame objects.
102
+ df = DataFrame.from_table("grocery_transaction")
103
+
104
+ # Check the list of available analytic functions.
105
+ display_analytic_functions()
106
+
107
+ # Import function CFilter.
108
+ from teradataml import CFilter
109
+
110
+ # Example 1: CFilter function to calculate the statistical measures
111
+ # of how likely each pair of items is to be purchased together, without
112
+ # specifying the partition_columns.
113
+ CFilter_out = CFilter(data=df,
114
+ target_column='item',
115
+ transaction_id_columns = 'tranid',
116
+ max_distinct_items=100)
117
+
118
+ # Print the result DataFrame.
119
+ print(CFilter_out.result)
120
+
121
+ # Example 2: CFilter function to calculate the statistical measures
122
+ # of how likely each pair of items is to be purchased together,
123
+ # specifying the partition_columns.
124
+ CFilter_out2 = CFilter(data=df,
125
+ target_column='item',
126
+ transaction_id_columns = 'tranid',
127
+ partiton_columns='storeid',
128
+ max_distinct_items=100)
129
+
130
+ # Print the result DataFrame.
131
+ print(CFilter_out2.result)
132
+ """
@@ -22,6 +22,8 @@ def ColumnTransformer(input_data = None, bincode_fit_data = None, function_fit_d
22
22
  User must create the FIT dataframe before using the function and must be provided in the same order
23
23
  as in the training data sequence to transform the dataset. The FIT dataframe can have maximum of
24
24
  128 columns.
25
+ Note:
26
+ * ColumnTransformer() function works only with python 3.6 and above.
25
27
 
26
28
 
27
29
  PARAMETERS:
@@ -1,12 +1,8 @@
1
- def FTest(data = None, alpha = None,
2
- first_sample_variance=None,
3
- first_sample_column=None,
4
- df1=None,
5
- second_sample_variance=None,
6
- second_sample_column=None,
7
- df2=2,
8
- alternate_hypothesis='two-tailed',
9
- **generic_arguments):
1
+ def FTest(data = None, alpha = 0.05, first_sample_variance=None,
2
+ first_sample_column=None, df1=None, second_sample_variance=None,
3
+ second_sample_column=None, df2=2, alternate_hypothesis='two-tailed',
4
+ sample_name_column=None, sample_value_column=None, first_sample_name=None,
5
+ second_sample_name=None, **generic_arguments):
10
6
  """
11
7
  DESCRIPTION:
12
8
  The FTest() function performs an F-test, for which the test statistic follows an
@@ -27,47 +23,72 @@ def FTest(data = None, alpha = None,
27
23
 
28
24
  alpha:
29
25
  Optional Argument.
30
- Specifies the probability of rejecting the null hypothesis when it is true
31
- (value below which null hypothesis is rejected).
32
- "alpha" must be a numeric value in the range [0, 1].
26
+ Specifies the probability of rejecting the null
27
+ hypothesis when the null hypothesis is true.
28
+ Note:
29
+ * "alpha" must be a numeric value in the range [0, 1].
33
30
  Default Value: 0.05
34
31
  Types: float
35
32
 
36
33
  first_sample_column:
37
- Required if "first_sample_variance" is omitted, disallowed otherwise.
38
- Specifies the name of the input column that contains the data for the
39
- first sample population.
34
+ Optional Argument.
35
+ Specifies the first sample column in F-Test.
36
+ Note:
37
+ * This argument must be specified with "first_sample_variance" and "df1"
38
+ or allowed combination is "first_sample_column" with
39
+ "second_sample_variance" and "df2".
40
+ * This argument cannot be used in conjunction with "sample_name_column"
41
+ and "sample_value_column".
40
42
  Types: str
41
43
 
42
44
  first_sample_variance:
43
- Required if "first_sample_column" is omitted, disallowed otherwise.
44
- Specifies the variance of the first sample population.
45
+ Optional Argument.
46
+ Specifies the first sample variance.
47
+ Note:
48
+ * This argument must be specified with "first_sample_column" and "df1"
49
+ or other allowed combination is "second_sample_column" with
50
+ "first_sample_variance" and "df1".
45
51
  Types: float
46
52
 
47
53
  df1:
48
- Required if "first_sample_column" is omitted, disallowed otherwise.
54
+ Optional Argument.
49
55
  Specifies the degrees of freedom of the first sample.
56
+ Note:
57
+ * This argument must be specified with "first_sample_column" and
58
+ "first_sample_variance".
50
59
  Types: integer
51
60
 
52
61
  second_sample_column:
53
- Required if "second_sample_variance" is omitted, disallowed otherwise.
54
- Specifies the name of the input column that contains the data for the
55
- second sample population.
62
+ Optional Argument.
63
+ Specifies the second sample column in F-Test.
64
+ Note:
65
+ * This argument must be specified with "second_sample_variance" and "df2"
66
+ or allowed combination is "second_sample_column" with "first_sample_variance"
67
+ and "df1".
68
+ * This argument cannot be used in conjunction with "sample_name_column"
69
+ and "sample_value_column".
56
70
  Types: str
57
71
 
58
72
  second_sample_variance:
59
- Required if "second_sample_column" is omitted, disallowed otherwise.
60
- Specifies the variance of the second sample population.
73
+ Optional Argument.
74
+ Specifies the second sample variance.
75
+ Note:
76
+ * This argument must be specified with "second_sample_column" and "df2"
77
+ or allowed combination is "first_sample_column" with
78
+ "second_sample_variance" and df2.
61
79
  Types: float
62
80
 
63
81
  df2:
64
- Required if "second_sample_column" is omitted, disallowed otherwise.
65
- Specifies the degrees of freedom of the second sample.
82
+ Optional Argument.
83
+ Specifies the degree of freedom of the second sample.
84
+ Note:
85
+ * This argument must be specified with "second_sample_column" and
86
+ "second_sample_variance".
66
87
  Types: integer
67
88
 
68
89
  alternate_hypothesis:
69
90
  Optional Argument.
70
- Specifies the alternative hypothesis.
91
+ Specifies the alternate hypothesis.
71
92
  Permitted Values:
72
93
  * lower-tailed - Alternate hypothesis (H 1): μ < μ0.
73
94
  * upper-tailed - Alternate hypothesis (H 1): μ > μ0.
@@ -79,6 +100,27 @@ def FTest(data = None, alpha = None,
79
100
  Default Value: two-tailed
80
101
  Types: str
81
102
 
103
+ sample_name_column:
104
+ Optional Argument.
105
+ Specifies the column name in "data" containing the names of the samples
106
+ included in the F-Test.
107
+ Types: str
108
+
109
+ sample_value_column:
110
+ Optional Argument.
111
+ Specifies the column name in "data" containing the values for each sample member.
112
+ Types: str
113
+
114
+ first_sample_name:
115
+ Optional Argument.
116
+ Specifies the name of the first sample included in the F-Test.
117
+ Types: str
118
+
119
+ second_sample_name:
120
+ Optional Argument.
121
+ Specifies the name of the second sample included in the F-Test.
122
+ Types: str
123
+
82
124
  **generic_arguments:
83
125
  Specifies the generic keyword arguments SQLE functions accept.
84
126
  Below are the generic keyword arguments:
@@ -129,9 +171,11 @@ def FTest(data = None, alpha = None,
129
171
 
130
172
  # Load the example data.
131
173
  load_example_data("teradataml", "titanic")
174
+ load_example_data("ztest", 'insect2Cols')
132
175
 
133
176
  # Create teradataml DataFrame object.
134
177
  titanic_data = DataFrame.from_table("titanic")
178
+ insect_gp = DataFrame.from_table("insect2Cols")
135
179
 
136
180
  # Check the list of available analytic functions.
137
181
  display_analytic_functions()
@@ -158,4 +202,39 @@ def FTest(data = None, alpha = None,
158
202
 
159
203
  # Print the result DataFrame.
160
204
  print(obj.result)
205
+
206
+ # Example 3: Run FTest() with sample_name_column, sample_value_column,
207
+ # first_sample_name and second_sample_name.
208
+ obj = FTest(data=insect_gp,
209
+ sample_value_column='groupValue',
210
+ sample_name_column='groupName',
211
+ first_sample_name='groupE',
212
+ second_sample_name='groupC')
213
+
214
+ # Print the result DataFrame.
215
+ print(obj.result)
216
+
217
+ # Example 4: Run FTest() with sample_name_column, sample_value_column,
218
+ # first_sample_name and second_sample_name.
219
+ obj = FTest(data=insect_gp,
220
+ sample_value_column='groupValue',
221
+ sample_name_column='groupName',
222
+ first_sample_name='groupE',
223
+ second_sample_variance=100.0,
224
+ df2=25)
225
+
226
+ # Print the result DataFrame.
227
+ print(obj.result)
228
+
229
+ # Example 5: Run FTest() with sample_name_column, sample_value_column,
230
+ # second_sample_name and first_sample_variance.
231
+ obj = FTest(data=insect_gp,
232
+ sample_value_column='groupValue',
233
+ sample_name_column='groupName',
234
+ second_sample_name='groupC',
235
+ first_sample_variance=85.0,
236
+ df1=19)
237
+
238
+ # Print the result DataFrame.
239
+ print(obj.result)
161
240
  """
@@ -2,7 +2,9 @@ def GLM(formula=None, data=None, input_columns=None, response_column=None, famil
2
2
  iter_max=300, batch_size=10, lambda1=0.02, alpha=0.15,
3
3
  iter_num_no_change=50, tolerance=0.001, intercept=True, class_weights="0:1.0, 1:1.0",
4
4
  learning_rate=None, initial_eta=0.05, decay_rate=0.25, decay_steps=5, momentum=0.0,
5
- nesterov=True, local_sgd_iterations=0, **generic_arguments):
5
+ nesterov=True, local_sgd_iterations=0, stepwise_direction=None, max_steps_num=5,
6
+ initial_stepwise_columns=None, attribute_data=None, parameter_data=None, iteration_mode="BATCH",
7
+ partition_column=None, **generic_arguments):
6
8
  """
7
9
  DESCRIPTION:
8
10
  The generalized linear model (GLM) function performs regression and classification
@@ -252,6 +254,55 @@ def GLM(formula=None, data=None, input_columns=None, response_column=None, famil
252
254
  Default Value: 0
253
255
  Types: int
254
256
 
257
+ stepwise_direction:
258
+ Optional Argument.
259
+ Specify the type of stepwise algorithm to be used.
260
+ Permitted Values: 'FORWARD', 'BACKWARD', 'BOTH', 'BIDIRECTIONAL'
261
+ Types: str
262
+
263
+ max_steps_num:
264
+ Optional Argument.
265
+ Specifies the maximum number of steps to be used for the Stepwise Algorithm.
266
+ Note:
267
+ * The "max_steps_num" must be in the range [1, 2147483647].
268
+ Default Value: 5
269
+ Types: int
270
+
271
+ attribute_data:
272
+ Optional Argument.
273
+ Specifies the teradataml DataFrame containing the attribute data.
274
+ Note:
275
+ * This is valid when "data_partition_column" argument is used.
276
+ Types: teradataml DataFrame
277
+
278
+ parameter_data:
279
+ Optional Argument.
280
+ Specifies the teradataml DataFrame containing the parameter data.
281
+ Note:
282
+ * This is valid when "data_partition_column" argument is used.
283
+ Types: teradataml DataFrame
284
+
285
+ iteration_mode:
286
+ Optional Argument.
287
+ Specifies the iteration mode.
288
+ Note:
289
+ * This is valid when "data_partition_column" argument is used.
290
+ Permitted Values: 'BATCH', 'EPOCH'
291
+ Default Value: 'BATCH'
292
+ Types: str
293
+
294
+ partition_column:
295
+ Optional Argument.
296
+ Specifies the column names of "data" on which to partition the input.
297
+ The name should be consistent with the "data_partition_column".
298
+ Note:
299
+ * If the "data_partition_column" is unicode with foreign language characters,
300
+ it is necessary to specify "partition_column" argument.
301
+ * Column range is not supported for "partition_column" argument.
302
+ * This is valid when "data_partition_column" argument is used.
303
+ Types: str
304
+
305
+
255
306
  **generic_arguments:
256
307
  Specifies the generic keyword arguments SQLE functions accept. Below
257
308
  are the generic keyword arguments:
@@ -377,4 +428,114 @@ def GLM(formula=None, data=None, input_columns=None, response_column=None, famil
377
428
  # Print the result DataFrame.
378
429
  print(GLM_out_2.result)
379
430
  print(GLM_out_2.output_data)
431
+
432
+ # Example 3 : Generate generalized linear model(GLM) using stepwise regression algorithm.
433
+ # This example uses the boston dataset and scales the data.
434
+ # Scaled data is used as input data to generate the GLM model.
435
+ # loading the example data
436
+ load_example_data("decisionforest", ["boston"])
437
+ load_example_data('glm', ['housing_train_segment', 'housing_train_parameter', 'housing_train_attribute'])
438
+
439
+ # Create teradataml DataFrame objects.
440
+ boston_df = DataFrame('boston')
441
+ housing_seg = DataFrame('housing_train_segment')
442
+ housing_parameter = DataFrame('housing_train_parameter')
443
+ housing_attribute = DataFrame('housing_train_attribute')
444
+
445
+ # Scaling the data
446
+ # Scale "target_columns" with respect to 'STD' value of the column.
447
+ fit_obj = ScaleFit(data=boston_df,
448
+ target_columns=['crim','zn','indus','chas','nox','rm','age','dis','rad','tax','ptratio','black','lstat',],
449
+ scale_method="STD")
450
+
451
+ # Scale values specified in the input data using the fit data generated by the ScaleFit() function above.
452
+ obj = ScaleTransform(object=fit_obj.output,
453
+ data=boston_df,
454
+ accumulate=["id","medv"])
455
+
456
+ boston = obj.result
457
+
458
+ # Generate generalized linear model(GLM) using stepwise regression algorithm.
459
+ glm_1 = GLM(data=boston,
460
+ input_columns=['indus','chas','nox','rm'],
461
+ response_column='medv',
462
+ family='GAUSSIAN',
463
+ lambda1=0.02,
464
+ alpha=0.33,
465
+ batch_size=10,
466
+ learning_rate='optimal',
467
+ iter_max=36,
468
+ iter_num_no_change=100,
469
+ tolerance=0.0001,
470
+ initial_eta=0.02,
471
+ stepwise_direction='backward',
472
+ max_steps_num=10)
473
+
474
+ # Print the result DataFrame.
475
+ print(glm_1.result)
476
+
477
+ # Example 4 : Generate generalized linear model(GLM) using
478
+ # stepwise regression algorithm with initial_stepwise_columns.
479
+ glm_2 = GLM(data=boston,
480
+ input_columns=['crim','zn','indus','chas','nox','rm','age','dis','rad','tax','ptratio','black','lstat'],
481
+ response_column='medv',
482
+ family='GAUSSIAN',
483
+ lambda1=0.02,
484
+ alpha=0.33,
485
+ batch_size=10,
486
+ learning_rate='optimal',
487
+ iter_max=36,
488
+ iter_num_no_change=100,
489
+ tolerance=0.0001,
490
+ initial_eta=0.02,
491
+ stepwise_direction='bidirectional',
492
+ max_steps_num=10,
493
+ initial_stepwise_columns=['rad','tax']
494
+ )
495
+
496
+ # Print the result DataFrame.
497
+ print(glm_2.result)
498
+
499
+ # Example 5 : Generate generalized linear model(GLM) using partition by key.
500
+ glm_3 = GLM(data=housing_seg,
501
+ input_columns=['bedrooms', 'bathrms', 'stories', 'driveway', 'recroom', 'fullbase', 'gashw', 'airco'],
502
+ response_column='price',
503
+ family='GAUSSIAN',
504
+ batch_size=10,
505
+ iter_max=1000,
506
+ data_partition_column='partition_id'
507
+ )
508
+
509
+ # Print the result DataFrame.
510
+ print(glm_3.result)
511
+
512
+ # Example 6 : Generate generalized linear model(GLM) using partition by key with attribute data.
513
+ glm_4 = GLM(data=housing_seg,
514
+ input_columns=['bedrooms', 'bathrms', 'stories', 'driveway', 'recroom', 'fullbase', 'gashw', 'airco'],
515
+ response_column='price',
516
+ family='GAUSSIAN',
517
+ batch_size=10,
518
+ iter_max=1000,
519
+ data_partition_column='partition_id',
520
+ attribute_data = housing_attribute,
521
+ attribute_data_partition_column = 'partition_id'
522
+ )
523
+
524
+ # Print the result DataFrame.
525
+ print(glm_4.result)
526
+
527
+ # Example 7 : Generate generalized linear model(GLM) using partition by key with parameter data
528
+ glm_5 = GLM(data=housing_seg,
529
+ input_columns=['bedrooms', 'bathrms', 'stories', 'driveway', 'recroom', 'fullbase', 'gashw', 'airco'],
530
+ response_column='homestyle',
531
+ family='binomial',
532
+ iter_max=1000,
533
+ data_partition_column='partition_id',
534
+ parameter_data = housing_parameter,
535
+ parameter_data_partition_column = 'partition_id'
536
+ )
537
+
538
+ # Print the result DataFrame.
539
+ print(glm_5.result)
540
+
380
541
  """