teradataml 20.0.0.1__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (240) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/LICENSE.pdf +0 -0
  3. teradataml/README.md +306 -0
  4. teradataml/__init__.py +10 -3
  5. teradataml/_version.py +1 -1
  6. teradataml/analytics/__init__.py +3 -2
  7. teradataml/analytics/analytic_function_executor.py +299 -16
  8. teradataml/analytics/analytic_query_generator.py +92 -0
  9. teradataml/analytics/byom/__init__.py +3 -2
  10. teradataml/analytics/json_parser/metadata.py +13 -3
  11. teradataml/analytics/json_parser/utils.py +13 -6
  12. teradataml/analytics/meta_class.py +40 -1
  13. teradataml/analytics/sqle/DecisionTreePredict.py +1 -1
  14. teradataml/analytics/sqle/__init__.py +11 -2
  15. teradataml/analytics/table_operator/__init__.py +4 -3
  16. teradataml/analytics/uaf/__init__.py +21 -2
  17. teradataml/analytics/utils.py +66 -1
  18. teradataml/analytics/valib.py +1 -1
  19. teradataml/automl/__init__.py +1502 -323
  20. teradataml/automl/custom_json_utils.py +139 -61
  21. teradataml/automl/data_preparation.py +247 -307
  22. teradataml/automl/data_transformation.py +32 -12
  23. teradataml/automl/feature_engineering.py +325 -86
  24. teradataml/automl/model_evaluation.py +44 -35
  25. teradataml/automl/model_training.py +122 -153
  26. teradataml/catalog/byom.py +8 -8
  27. teradataml/clients/pkce_client.py +1 -1
  28. teradataml/common/__init__.py +2 -1
  29. teradataml/common/constants.py +72 -0
  30. teradataml/common/deprecations.py +13 -7
  31. teradataml/common/garbagecollector.py +152 -120
  32. teradataml/common/messagecodes.py +11 -2
  33. teradataml/common/messages.py +4 -1
  34. teradataml/common/sqlbundle.py +26 -4
  35. teradataml/common/utils.py +225 -14
  36. teradataml/common/wrapper_utils.py +1 -1
  37. teradataml/context/context.py +82 -2
  38. teradataml/data/SQL_Fundamentals.pdf +0 -0
  39. teradataml/data/complaints_test_tokenized.csv +353 -0
  40. teradataml/data/complaints_tokens_model.csv +348 -0
  41. teradataml/data/covid_confirm_sd.csv +83 -0
  42. teradataml/data/dataframe_example.json +27 -1
  43. teradataml/data/docs/sqle/docs_17_20/CFilter.py +132 -0
  44. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +162 -0
  45. teradataml/data/docs/sqle/docs_17_20/OutlierFilterFit.py +2 -0
  46. teradataml/data/docs/sqle/docs_17_20/Pivoting.py +279 -0
  47. teradataml/data/docs/sqle/docs_17_20/Shap.py +203 -0
  48. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +189 -0
  49. teradataml/data/docs/sqle/docs_17_20/TFIDF.py +142 -0
  50. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  51. teradataml/data/docs/sqle/docs_17_20/Unpivoting.py +216 -0
  52. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  53. teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -10
  54. teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +1 -1
  55. teradataml/data/docs/uaf/docs_17_20/ArimaForecast.py +35 -5
  56. teradataml/data/docs/uaf/docs_17_20/ArimaValidate.py +3 -1
  57. teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +293 -0
  58. teradataml/data/docs/uaf/docs_17_20/AutoArima.py +354 -0
  59. teradataml/data/docs/uaf/docs_17_20/BreuschGodfrey.py +3 -2
  60. teradataml/data/docs/uaf/docs_17_20/BreuschPaganGodfrey.py +1 -1
  61. teradataml/data/docs/uaf/docs_17_20/Convolve.py +13 -10
  62. teradataml/data/docs/uaf/docs_17_20/Convolve2.py +4 -1
  63. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  64. teradataml/data/docs/uaf/docs_17_20/CumulPeriodogram.py +5 -4
  65. teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +4 -4
  66. teradataml/data/docs/uaf/docs_17_20/DWT.py +235 -0
  67. teradataml/data/docs/uaf/docs_17_20/DWT2D.py +214 -0
  68. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  69. teradataml/data/docs/uaf/docs_17_20/DurbinWatson.py +1 -1
  70. teradataml/data/docs/uaf/docs_17_20/ExtractResults.py +1 -1
  71. teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +160 -0
  72. teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +1 -1
  73. teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +9 -31
  74. teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +4 -2
  75. teradataml/data/docs/uaf/docs_17_20/IDFFT2.py +1 -8
  76. teradataml/data/docs/uaf/docs_17_20/IDWT.py +236 -0
  77. teradataml/data/docs/uaf/docs_17_20/IDWT2D.py +226 -0
  78. teradataml/data/docs/uaf/docs_17_20/IQR.py +134 -0
  79. teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
  80. teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +2 -2
  81. teradataml/data/docs/uaf/docs_17_20/MAMean.py +3 -3
  82. teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +297 -0
  83. teradataml/data/docs/uaf/docs_17_20/MatrixMultiply.py +15 -6
  84. teradataml/data/docs/uaf/docs_17_20/PACF.py +0 -1
  85. teradataml/data/docs/uaf/docs_17_20/Portman.py +2 -2
  86. teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
  87. teradataml/data/docs/uaf/docs_17_20/Resample.py +9 -1
  88. teradataml/data/docs/uaf/docs_17_20/SAX.py +246 -0
  89. teradataml/data/docs/uaf/docs_17_20/SeasonalNormalize.py +17 -10
  90. teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
  91. teradataml/data/docs/uaf/docs_17_20/WhitesGeneral.py +3 -1
  92. teradataml/data/docs/uaf/docs_17_20/WindowDFFT.py +368 -0
  93. teradataml/data/dwt2d_dataTable.csv +65 -0
  94. teradataml/data/dwt_dataTable.csv +8 -0
  95. teradataml/data/dwt_filterTable.csv +3 -0
  96. teradataml/data/finance_data4.csv +13 -0
  97. teradataml/data/grocery_transaction.csv +19 -0
  98. teradataml/data/idwt2d_dataTable.csv +5 -0
  99. teradataml/data/idwt_dataTable.csv +8 -0
  100. teradataml/data/idwt_filterTable.csv +3 -0
  101. teradataml/data/interval_data.csv +5 -0
  102. teradataml/data/jsons/paired_functions.json +14 -0
  103. teradataml/data/jsons/sqle/17.20/TD_CFilter.json +118 -0
  104. teradataml/data/jsons/sqle/17.20/TD_NaiveBayes.json +193 -0
  105. teradataml/data/jsons/sqle/17.20/TD_NaiveBayesPredict.json +212 -0
  106. teradataml/data/jsons/sqle/17.20/TD_OneClassSVM.json +9 -9
  107. teradataml/data/jsons/sqle/17.20/TD_Pivoting.json +280 -0
  108. teradataml/data/jsons/sqle/17.20/TD_Shap.json +222 -0
  109. teradataml/data/jsons/sqle/17.20/TD_TFIDF.json +162 -0
  110. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  111. teradataml/data/jsons/sqle/17.20/TD_Unpivoting.json +235 -0
  112. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  113. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  114. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  115. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  116. teradataml/data/jsons/storedprocedure/17.20/TD_FILTERFACTORY1D.json +150 -0
  117. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  118. teradataml/data/jsons/uaf/17.20/TD_ACF.json +1 -18
  119. teradataml/data/jsons/uaf/17.20/TD_ARIMAESTIMATE.json +3 -16
  120. teradataml/data/jsons/uaf/17.20/TD_ARIMAFORECAST.json +0 -3
  121. teradataml/data/jsons/uaf/17.20/TD_ARIMAVALIDATE.json +5 -3
  122. teradataml/data/jsons/uaf/17.20/TD_ARIMAXESTIMATE.json +362 -0
  123. teradataml/data/jsons/uaf/17.20/TD_AUTOARIMA.json +469 -0
  124. teradataml/data/jsons/uaf/17.20/TD_BINARYMATRIXOP.json +0 -3
  125. teradataml/data/jsons/uaf/17.20/TD_BINARYSERIESOP.json +0 -2
  126. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_GODFREY.json +2 -1
  127. teradataml/data/jsons/uaf/17.20/TD_BREUSCH_PAGAN_GODFREY.json +2 -5
  128. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE.json +3 -6
  129. teradataml/data/jsons/uaf/17.20/TD_CONVOLVE2.json +1 -3
  130. teradataml/data/jsons/uaf/17.20/TD_CUMUL_PERIODOGRAM.json +0 -5
  131. teradataml/data/jsons/uaf/17.20/TD_DFFT.json +1 -4
  132. teradataml/data/jsons/uaf/17.20/TD_DFFT2.json +2 -7
  133. teradataml/data/jsons/uaf/17.20/TD_DFFT2CONV.json +1 -2
  134. teradataml/data/jsons/uaf/17.20/TD_DFFTCONV.json +0 -2
  135. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  136. teradataml/data/jsons/uaf/17.20/TD_DTW.json +3 -6
  137. teradataml/data/jsons/uaf/17.20/TD_DWT.json +173 -0
  138. teradataml/data/jsons/uaf/17.20/TD_DWT2D.json +160 -0
  139. teradataml/data/jsons/uaf/17.20/TD_FITMETRICS.json +1 -1
  140. teradataml/data/jsons/uaf/17.20/TD_GOLDFELD_QUANDT.json +16 -30
  141. teradataml/data/jsons/uaf/17.20/{TD_HOLT_WINTERS_FORECAST.json → TD_HOLT_WINTERS_FORECASTER.json} +1 -2
  142. teradataml/data/jsons/uaf/17.20/TD_IDFFT2.json +1 -15
  143. teradataml/data/jsons/uaf/17.20/TD_IDWT.json +162 -0
  144. teradataml/data/jsons/uaf/17.20/TD_IDWT2D.json +149 -0
  145. teradataml/data/jsons/uaf/17.20/TD_IQR.json +117 -0
  146. teradataml/data/jsons/uaf/17.20/TD_LINEAR_REGR.json +1 -1
  147. teradataml/data/jsons/uaf/17.20/TD_LINESPEC.json +1 -1
  148. teradataml/data/jsons/uaf/17.20/TD_MAMEAN.json +1 -3
  149. teradataml/data/jsons/uaf/17.20/TD_MATRIX2IMAGE.json +209 -0
  150. teradataml/data/jsons/uaf/17.20/TD_PACF.json +2 -2
  151. teradataml/data/jsons/uaf/17.20/TD_POWERSPEC.json +5 -5
  152. teradataml/data/jsons/uaf/17.20/TD_RESAMPLE.json +48 -28
  153. teradataml/data/jsons/uaf/17.20/TD_SAX.json +210 -0
  154. teradataml/data/jsons/uaf/17.20/TD_SEASONALNORMALIZE.json +12 -6
  155. teradataml/data/jsons/uaf/17.20/TD_SIMPLEEXP.json +0 -1
  156. teradataml/data/jsons/uaf/17.20/TD_TRACKINGOP.json +8 -8
  157. teradataml/data/jsons/uaf/17.20/TD_UNDIFF.json +1 -1
  158. teradataml/data/jsons/uaf/17.20/TD_UNNORMALIZE.json +1 -1
  159. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +410 -0
  160. teradataml/data/load_example_data.py +8 -2
  161. teradataml/data/medical_readings.csv +101 -0
  162. teradataml/data/naivebayestextclassifier_example.json +1 -1
  163. teradataml/data/naivebayestextclassifierpredict_example.json +11 -0
  164. teradataml/data/patient_profile.csv +101 -0
  165. teradataml/data/peppers.png +0 -0
  166. teradataml/data/real_values.csv +14 -0
  167. teradataml/data/sax_example.json +8 -0
  168. teradataml/data/scripts/deploy_script.py +1 -1
  169. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  170. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  171. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  172. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  173. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -160
  174. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  175. teradataml/data/scripts/sklearn/sklearn_function.template +34 -16
  176. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  177. teradataml/data/scripts/sklearn/sklearn_neighbors.py +1 -1
  178. teradataml/data/scripts/sklearn/sklearn_score.py +12 -3
  179. teradataml/data/scripts/sklearn/sklearn_transform.py +162 -24
  180. teradataml/data/star_pivot.csv +8 -0
  181. teradataml/data/target_udt_data.csv +8 -0
  182. teradataml/data/templates/open_source_ml.json +3 -1
  183. teradataml/data/teradataml_example.json +20 -1
  184. teradataml/data/timestamp_data.csv +4 -0
  185. teradataml/data/titanic_dataset_unpivoted.csv +19 -0
  186. teradataml/data/uaf_example.json +55 -1
  187. teradataml/data/unpivot_example.json +15 -0
  188. teradataml/data/url_data.csv +9 -0
  189. teradataml/data/vectordistance_example.json +4 -0
  190. teradataml/data/windowdfft.csv +16 -0
  191. teradataml/dataframe/copy_to.py +1 -1
  192. teradataml/dataframe/data_transfer.py +5 -3
  193. teradataml/dataframe/dataframe.py +1002 -201
  194. teradataml/dataframe/fastload.py +3 -3
  195. teradataml/dataframe/functions.py +867 -0
  196. teradataml/dataframe/row.py +160 -0
  197. teradataml/dataframe/setop.py +2 -2
  198. teradataml/dataframe/sql.py +840 -33
  199. teradataml/dataframe/window.py +1 -1
  200. teradataml/dbutils/dbutils.py +878 -34
  201. teradataml/dbutils/filemgr.py +48 -1
  202. teradataml/geospatial/geodataframe.py +1 -1
  203. teradataml/geospatial/geodataframecolumn.py +1 -1
  204. teradataml/hyperparameter_tuner/optimizer.py +13 -13
  205. teradataml/lib/aed_0_1.dll +0 -0
  206. teradataml/opensource/__init__.py +1 -1
  207. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  208. teradataml/opensource/_lightgbm.py +950 -0
  209. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  210. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  211. teradataml/opensource/sklearn/__init__.py +0 -1
  212. teradataml/opensource/sklearn/_sklearn_wrapper.py +1019 -574
  213. teradataml/options/__init__.py +9 -23
  214. teradataml/options/configure.py +42 -4
  215. teradataml/options/display.py +2 -2
  216. teradataml/plot/axis.py +4 -4
  217. teradataml/scriptmgmt/UserEnv.py +13 -9
  218. teradataml/scriptmgmt/lls_utils.py +77 -23
  219. teradataml/store/__init__.py +13 -0
  220. teradataml/store/feature_store/__init__.py +0 -0
  221. teradataml/store/feature_store/constants.py +291 -0
  222. teradataml/store/feature_store/feature_store.py +2223 -0
  223. teradataml/store/feature_store/models.py +1505 -0
  224. teradataml/store/vector_store/__init__.py +1586 -0
  225. teradataml/table_operators/Script.py +2 -2
  226. teradataml/table_operators/TableOperator.py +106 -20
  227. teradataml/table_operators/query_generator.py +3 -0
  228. teradataml/table_operators/table_operator_query_generator.py +3 -1
  229. teradataml/table_operators/table_operator_util.py +102 -56
  230. teradataml/table_operators/templates/dataframe_register.template +69 -0
  231. teradataml/table_operators/templates/dataframe_udf.template +63 -0
  232. teradataml/telemetry_utils/__init__.py +0 -0
  233. teradataml/telemetry_utils/queryband.py +52 -0
  234. teradataml/utils/dtypes.py +4 -2
  235. teradataml/utils/validators.py +34 -2
  236. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +311 -3
  237. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +240 -157
  238. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  239. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  240. {teradataml-20.0.0.1.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -0,0 +1,235 @@
1
+ {
2
+ "json_schema_major_version": "1",
3
+ "json_schema_minor_version": "1",
4
+ "json_content_version": "1",
5
+ "function_name": "TD_Unpivoting",
6
+ "function_version": "1.0",
7
+ "function_alias_name": "TD_Unpivoting",
8
+ "function_type": "fastpath",
9
+ "function_category": "Feature Engineering Transform",
10
+ "function_r_name": "aa.td.unpivoting",
11
+ "short_description": "This function is used to unpivot the data i.e. change the data from dense format to sparse format.",
12
+ "long_description": "This function is used to unpivot the data i.e. change the data from dense format to sparse format.",
13
+ "input_tables": [
14
+ {
15
+ "requiredInputKind": [
16
+ "PartitionByAny"
17
+ ],
18
+ "isOrdered": false,
19
+ "partitionByOne": false,
20
+ "name": "InputTable",
21
+ "alternateNames": [],
22
+ "isRequired": true,
23
+ "rDescription": "Specifies the table containing the input data to be unpivoted.",
24
+ "description": "Specifies the table containing the input data to be unpivoted.",
25
+ "datatype": "TABLE_ALIAS",
26
+ "allowsLists": false,
27
+ "rName": "data",
28
+ "useInR": true,
29
+ "rOrderNum": 1
30
+ }
31
+ ],
32
+ "argument_clauses": [
33
+ {
34
+ "targetTable": [
35
+ "InputTable"
36
+ ],
37
+ "checkDuplicate": true,
38
+ "allowedTypes": [],
39
+ "allowedTypeGroups": [
40
+ "ALL"
41
+ ],
42
+ "matchLengthOfArgument": "",
43
+ "allowPadding": false,
44
+ "name": "IDColumn",
45
+ "alternateNames": [],
46
+ "isRequired": true,
47
+ "rDescription": "Specify the column which contains the input data identifier.",
48
+ "description": "Specify the column which contains the input data identifier.",
49
+ "datatype": "COLUMNS",
50
+ "allowsLists": false,
51
+ "rName": "id.column",
52
+ "useInR": true,
53
+ "rOrderNum": 2
54
+ },
55
+ {
56
+ "targetTable": [
57
+ "InputTable"
58
+ ],
59
+ "checkDuplicate": true,
60
+ "allowedTypes": [],
61
+ "allowedTypeGroups": [
62
+ "ALL"
63
+ ],
64
+ "matchLengthOfArgument": "",
65
+ "allowPadding": false,
66
+ "name": "TargetColumns",
67
+ "alternateNames": [],
68
+ "isRequired": true,
69
+ "rDescription": "Specify the columns from the input table which contains the data for unpivoting.",
70
+ "description": "Specify the columns from the input table which contains the data for unpivoting.",
71
+ "datatype": "COLUMNS",
72
+ "allowsLists": true,
73
+ "rName": "target.columns",
74
+ "useInR": true,
75
+ "rOrderNum": 3
76
+ },
77
+ {
78
+ "permittedValues": [],
79
+ "isOutputColumn": false,
80
+ "matchLengthOfArgument": "",
81
+ "allowPadding": false,
82
+ "name": "AttributeAliasList",
83
+ "alternateNames": [],
84
+ "isRequired": false,
85
+ "rDescription": "Specify alternate names for the values in the AttributeName output column.",
86
+ "description": "Specify alternate names for the values in the AttributeName output column.",
87
+ "datatype": "STRING",
88
+ "allowsLists": true,
89
+ "rName": "alias.names",
90
+ "useInR": true,
91
+ "rOrderNum": 4
92
+ },
93
+ {
94
+ "permittedValues": [],
95
+ "isOutputColumn": true,
96
+ "matchLengthOfArgument": "",
97
+ "allowPadding": false,
98
+ "defaultValue": "AttributeName",
99
+ "name": "AttributeColName",
100
+ "alternateNames": [],
101
+ "isRequired": false,
102
+ "rDescription": "Specify output column name for AttributeName column.",
103
+ "description": "Specify output column name for AttributeName column.",
104
+ "datatype": "STRING",
105
+ "allowsLists": false,
106
+ "rName": "attribute.column",
107
+ "useInR": true,
108
+ "rOrderNum": 5
109
+ },
110
+ {
111
+ "permittedValues": [],
112
+ "isOutputColumn": true,
113
+ "matchLengthOfArgument": "",
114
+ "allowPadding": false,
115
+ "defaultValue": "AttributeValue",
116
+ "name": "ValueColName",
117
+ "alternateNames": [],
118
+ "isRequired": false,
119
+ "rDescription": "Specify output column name for AttributeValue column.",
120
+ "description": "Specify output column name for AttributeValue column.",
121
+ "datatype": "STRING",
122
+ "allowsLists": false,
123
+ "rName": "value.column",
124
+ "useInR": true,
125
+ "rOrderNum": 6
126
+ },
127
+ {
128
+ "targetTable": [
129
+ "InputTable"
130
+ ],
131
+ "checkDuplicate": true,
132
+ "allowedTypes": [],
133
+ "allowedTypeGroups": [
134
+ "ALL"
135
+ ],
136
+ "matchLengthOfArgument": "",
137
+ "allowPadding": false,
138
+ "name": "Accumulate",
139
+ "alternateNames": [],
140
+ "isRequired": false,
141
+ "rDescription": "Specifies the input table columns to copy to the output table. By default, the function copies no input table columns to the output table.",
142
+ "description": "Specifies the input table columns to copy to the output table. By default, the function copies no input table columns to the output table.",
143
+ "datatype": "COLUMNS",
144
+ "allowsLists": true,
145
+ "rName": "accumulate",
146
+ "useInR": true,
147
+ "rOrderNum": 7
148
+ },
149
+ {
150
+ "permittedValues": [],
151
+ "isOutputColumn": false,
152
+ "matchLengthOfArgument": "",
153
+ "allowPadding": false,
154
+ "defaultValue": false,
155
+ "name": "IncludeNulls",
156
+ "alternateNames": [],
157
+ "isRequired": false,
158
+ "rDescription": "Specify whether or not to include nulls in the transformation.",
159
+ "description": "Specify whether or not to include nulls in the transformation.",
160
+ "datatype": "BOOLEAN",
161
+ "allowsLists": false,
162
+ "rName": "include.nulls",
163
+ "useInR": true,
164
+ "rOrderNum": 8
165
+ },
166
+ {
167
+ "permittedValues": [],
168
+ "isOutputColumn": false,
169
+ "matchLengthOfArgument": "",
170
+ "allowPadding": false,
171
+ "defaultValue": false,
172
+ "name": "InputTypes",
173
+ "alternateNames": [],
174
+ "isRequired": false,
175
+ "rDescription": "Specify true, if instead of one column for all attribute values, need multiple columns corresponding to data type groups.",
176
+ "description": "Specify true, if instead of one column for all attribute values, need multiple columns corresponding to data type groups.",
177
+ "datatype": "BOOLEAN",
178
+ "allowsLists": false,
179
+ "rName": "input.types",
180
+ "useInR": true,
181
+ "rOrderNum": 9
182
+ },
183
+ {
184
+ "permittedValues": [],
185
+ "isOutputColumn": false,
186
+ "matchLengthOfArgument": "",
187
+ "allowPadding": false,
188
+ "defaultValue": false,
189
+ "name": "OutputVarchar",
190
+ "alternateNames": [],
191
+ "isRequired": false,
192
+ "rDescription": "Specify true if there is a need to output the AttributeValue column in varchar format irrespective of it's data type.",
193
+ "description": "Specify true if there is a need to output the AttributeValue column in varchar format irrespective of it's data type.",
194
+ "datatype": "BOOLEAN",
195
+ "allowsLists": false,
196
+ "rName": "output.varchar",
197
+ "useInR": true,
198
+ "rOrderNum": 10
199
+ },
200
+ {
201
+ "permittedValues": [],
202
+ "isOutputColumn": false,
203
+ "matchLengthOfArgument": "",
204
+ "allowPadding": false,
205
+ "defaultValue": false,
206
+ "name": "IndexedAttribute",
207
+ "alternateNames": [],
208
+ "isRequired": false,
209
+ "rDescription": "Specify true if there is a need to output the column indexes instead of column names in AttributeName column",
210
+ "description": "Specify true if there is a need to output the column indexes instead of column names in AttributeName column",
211
+ "datatype": "BOOLEAN",
212
+ "allowsLists": false,
213
+ "rName": "indexed.attribute",
214
+ "useInR": true,
215
+ "rOrderNum": 11
216
+ },
217
+ {
218
+ "permittedValues": [],
219
+ "isOutputColumn": false,
220
+ "matchLengthOfArgument": "",
221
+ "allowPadding": false,
222
+ "defaultValue": false,
223
+ "name": "IncludeDataTypes",
224
+ "alternateNames": [],
225
+ "isRequired": false,
226
+ "rDescription": "Specify true to output the original data type name.",
227
+ "description": "Specify true to output the original data type name.",
228
+ "datatype": "BOOLEAN",
229
+ "allowsLists": false,
230
+ "rName": "include.datatypes",
231
+ "useInR": true,
232
+ "rOrderNum": 12
233
+ }
234
+ ]
235
+ }
@@ -0,0 +1,250 @@
1
+ {
2
+ "json_schema_major_version": "1",
3
+ "json_schema_minor_version": "1",
4
+ "json_content_version": "1",
5
+ "function_name": "TD_KMeans",
6
+ "function_version": "1.0",
7
+ "function_type": "fastpath",
8
+ "function_category": "Model Training",
9
+ "function_alias_name": "TD_KMeans",
10
+ "function_r_name": "aa.td_kmeans",
11
+ "short_description": "fastpath function to generate clustering model using KMeans algorithm.",
12
+ "long_description": "fastpath function to generate clustering model containing cluster centroids using KMeans algorithm.",
13
+ "input_tables": [
14
+ {
15
+ "requiredInputKind": [
16
+ "PartitionByAny"
17
+ ],
18
+ "isOrdered": false,
19
+ "partitionByOne": false,
20
+ "name": "InputTable",
21
+ "alternateNames": [],
22
+ "isRequired": true,
23
+ "rDescription": "The relation that contains input data.",
24
+ "description": "The relation that contains input data.",
25
+ "datatype": "TABLE_ALIAS",
26
+ "allowsLists": false,
27
+ "rName": "data",
28
+ "useInR": true,
29
+ "rOrderNum": 1
30
+ },
31
+ {
32
+ "requiredInputKind": [
33
+ "Dimension"
34
+ ],
35
+ "isOrdered": false,
36
+ "partitionByOne": false,
37
+ "name": "InitialCentroidsTable",
38
+ "alternateNames": [],
39
+ "isRequired": false,
40
+ "rDescription": "The relation that contains set of initial centroids.",
41
+ "description": "The relation that contains set of initial centroids.",
42
+ "datatype": "TABLE_ALIAS",
43
+ "allowsLists": false,
44
+ "rName": "centroids.table",
45
+ "useInR": true,
46
+ "rOrderNum": 2
47
+ }
48
+ ],
49
+ "output_tables": [
50
+ {
51
+ "isOutputTable": true,
52
+ "omitPossible": true,
53
+ "name": "ModelTable",
54
+ "alternateNames": [],
55
+ "isRequired": false,
56
+ "rDescription": "Specifies the name of the table in which the generated KMeans model can be stored.",
57
+ "description": "Specifies the name of the table in which the generated KMeans model can be stored.",
58
+ "datatype": "TABLE_NAME",
59
+ "allowsLists": false,
60
+ "rName": "model.table",
61
+ "useInR": true,
62
+ "rOrderNum": 3
63
+ }
64
+ ],
65
+ "argument_clauses": [
66
+ {
67
+ "targetTable": [
68
+ "InputTable"
69
+ ],
70
+ "checkDuplicate": true,
71
+ "allowedTypes": [],
72
+ "allowedTypeGroups": [
73
+ "ALL"
74
+ ],
75
+ "matchLengthOfArgument": "",
76
+ "allowPadding": false,
77
+ "name": "IdColumn",
78
+ "alternateNames": [],
79
+ "isRequired": true,
80
+ "rDescription": "Specifies the column which is unique identifier of input row.",
81
+ "description": "Specifies the column which is unique identifier of input row.",
82
+ "datatype": "COLUMNS",
83
+ "allowsLists": false,
84
+ "rName": "id.column",
85
+ "useInR": true,
86
+ "rOrderNum": 4
87
+ },
88
+ {
89
+ "targetTable": [
90
+ "InputTable"
91
+ ],
92
+ "checkDuplicate": true,
93
+ "allowedTypes": [],
94
+ "allowedTypeGroups": [
95
+ "NUMERIC","AIVECTOR","BYTE","VARBYTE"
96
+ ],
97
+ "matchLengthOfArgument": "",
98
+ "allowPadding": false,
99
+ "name": "TargetColumns",
100
+ "alternateNames": [],
101
+ "isRequired": true,
102
+ "rDescription": "Specifies the columns/features to be used to cluster the data.",
103
+ "description": "Specifies the columns/features to be used to cluster the data.",
104
+ "datatype": "COLUMNS",
105
+ "allowsLists": true,
106
+ "rName": "target.columns",
107
+ "useInR": true,
108
+ "rOrderNum": 5
109
+ },
110
+ {
111
+ "lowerBound": 1,
112
+ "upperBound": 2147483647,
113
+ "lowerBoundType": "EXCLUSIVE",
114
+ "upperBoundType": "INCLUSIVE",
115
+ "allowNaN": false,
116
+ "name": "NumClusters",
117
+ "alternateNames": [],
118
+ "isRequired": false,
119
+ "rDescription": "Specifies the number of clusters to be produced. This argument is not allowed with InitialCentroidsTable provided.",
120
+ "description": "Specifies the number of clusters to be produced. This argument is not allowed with InitialCentroidsTable provided.",
121
+ "datatype": "INTEGER",
122
+ "allowsLists": false,
123
+ "rName": "num.clusters",
124
+ "useInR": true,
125
+ "rOrderNum": 6
126
+ },
127
+ {
128
+ "lowerBound": 0,
129
+ "upperBound": 2147483647,
130
+ "lowerBoundType": "INCLUSIVE",
131
+ "upperBoundType": "INCLUSIVE",
132
+ "allowNaN": false,
133
+ "name": "Seed",
134
+ "alternateNames": [],
135
+ "isRequired": false,
136
+ "rDescription": "Specify the random seed the algorithm uses for repeatable results. The algorithm uses the seed to randomly sample the input table rows as initial clusters.",
137
+ "description": "Specify the random seed the algorithm uses for repeatable results. The algorithm uses the seed to randomly sample the input table rows as initial clusters.",
138
+ "datatype": "INTEGER",
139
+ "allowsLists": false,
140
+ "rName": "seed",
141
+ "useInR": true,
142
+ "rOrderNum": 7
143
+ },
144
+ {
145
+ "defaultValue": 0.0395,
146
+ "lowerBound": 0,
147
+ "upperBound": 1.797e+308,
148
+ "lowerBoundType": "INCLUSIVE",
149
+ "upperBoundType": "INCLUSIVE",
150
+ "allowNaN": false,
151
+ "name": "StopThreshold",
152
+ "alternateNames": [],
153
+ "isRequired": false,
154
+ "rDescription": "Specify the convergence threshold. When the centroids move by less than this amount, the algorithm has converged.",
155
+ "description": "Specify the convergence threshold. When the centroids move by less than this amount, the algorithm has converged.",
156
+ "datatype": "DOUBLE",
157
+ "allowsLists": false,
158
+ "rName": "threshold",
159
+ "useInR": true,
160
+ "rOrderNum": 8
161
+ },
162
+ {
163
+ "defaultValue": 10,
164
+ "lowerBound": 1,
165
+ "upperBound": 2147483647,
166
+ "lowerBoundType": "INCLUSIVE",
167
+ "upperBoundType": "INCLUSIVE",
168
+ "allowNaN": false,
169
+ "name": "MaxIterNum",
170
+ "alternateNames": [],
171
+ "isRequired": false,
172
+ "rDescription": "Specify the maximum number of iterations that the algorithm runs before quitting if the convergence threshold has not been met.",
173
+ "description": "Specify the maximum number of iterations that the algorithm runs before quitting if the convergence threshold has not been met.",
174
+ "datatype": "INTEGER",
175
+ "allowsLists": false,
176
+ "rName": "iter.max",
177
+ "useInR": true,
178
+ "rOrderNum": 9
179
+ },
180
+ {
181
+ "defaultValue": 1,
182
+ "lowerBound": 1,
183
+ "upperBound": 2147483647,
184
+ "lowerBoundType": "INCLUSIVE",
185
+ "upperBoundType": "INCLUSIVE",
186
+ "allowNaN": false,
187
+ "name": "NumInit",
188
+ "alternateNames": [],
189
+ "isRequired": false,
190
+ "rDescription": "The number of times, the k-means algorithm will be run with different initial centroid seeds. The function will emit out the model having the least value of Total Within Cluster Squared Sum.",
191
+ "description": "The number of times, the k-means algorithm will be run with different initial centroid seeds. The function will emit out the model having the least value of Total Within Cluster Squared Sum.",
192
+ "datatype": "INTEGER",
193
+ "allowsLists": false,
194
+ "rName": "num.init",
195
+ "useInR": true,
196
+ "rOrderNum": 10
197
+ },
198
+ {
199
+ "defaultValue": false,
200
+ "name": "OutputClusterAssignment",
201
+ "alternateNames": [],
202
+ "isRequired": false,
203
+ "rDescription": "Specifies whether to output Cluster Assignment.",
204
+ "description": "Specifies whether to output Cluster Assignment.",
205
+ "datatype": "BOOLEAN",
206
+ "allowsLists": false,
207
+ "rName": "output.cluster.assignment",
208
+ "useInR": true,
209
+ "rOrderNum": 11
210
+ },
211
+ {
212
+ "permittedValues": [
213
+ "RANDOM",
214
+ "KMEANS++"
215
+ ],
216
+ "defaultValue": "RANDOM",
217
+ "isOutputColumn": false,
218
+ "matchLengthOfArgument": "",
219
+ "allowPadding": false,
220
+ "name": "InitialCentroidsMethod",
221
+ "alternateNames": [],
222
+ "isRequired": false,
223
+ "rDescription": "Specifies the initialization method to be used for selecting initial set of centroids.",
224
+ "description": "Specifies the initialization method to be used for selecting initial set of centroids.",
225
+ "datatype": "STRING",
226
+ "allowsLists": false,
227
+ "rName": "initialcentroids.method",
228
+ "useInR": true,
229
+ "rOrderNum": 12
230
+ },
231
+ {
232
+ "defaultValue": 1,
233
+ "lowerBound": 1,
234
+ "upperBound": 4096,
235
+ "lowerBoundType": "INCLUSIVE",
236
+ "upperBoundType": "INCLUSIVE",
237
+ "allowNaN": false,
238
+ "name": "EmbeddingSize",
239
+ "alternateNames": [],
240
+ "isRequired": false,
241
+ "rDescription": "Specify the embedding size of the vectors.",
242
+ "description": "Specify the embedding size of the vectors.",
243
+ "datatype": "INTEGER",
244
+ "allowsLists": false,
245
+ "rName": "embedding.size",
246
+ "useInR": true,
247
+ "rOrderNum": 13
248
+ }
249
+ ]
250
+ }