teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of teradataml might be problematic. Click here for more details.

Files changed (88) hide show
  1. teradataml/LICENSE-3RD-PARTY.pdf +0 -0
  2. teradataml/README.md +196 -2
  3. teradataml/__init__.py +4 -0
  4. teradataml/_version.py +1 -1
  5. teradataml/analytics/analytic_function_executor.py +79 -4
  6. teradataml/analytics/json_parser/metadata.py +12 -3
  7. teradataml/analytics/json_parser/utils.py +7 -2
  8. teradataml/analytics/sqle/__init__.py +1 -0
  9. teradataml/analytics/table_operator/__init__.py +1 -1
  10. teradataml/analytics/uaf/__init__.py +1 -1
  11. teradataml/analytics/utils.py +4 -0
  12. teradataml/automl/data_preparation.py +3 -2
  13. teradataml/automl/feature_engineering.py +15 -7
  14. teradataml/automl/model_training.py +39 -33
  15. teradataml/common/__init__.py +2 -1
  16. teradataml/common/constants.py +35 -0
  17. teradataml/common/garbagecollector.py +2 -1
  18. teradataml/common/messagecodes.py +8 -2
  19. teradataml/common/messages.py +3 -1
  20. teradataml/common/sqlbundle.py +25 -3
  21. teradataml/common/utils.py +134 -9
  22. teradataml/context/context.py +20 -10
  23. teradataml/data/SQL_Fundamentals.pdf +0 -0
  24. teradataml/data/dataframe_example.json +18 -2
  25. teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
  26. teradataml/data/docs/sqle/docs_17_20/Shap.py +7 -1
  27. teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
  28. teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
  29. teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
  30. teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
  31. teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
  32. teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
  33. teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
  34. teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
  35. teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
  36. teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
  37. teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
  38. teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
  39. teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
  40. teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
  41. teradataml/data/medical_readings.csv +101 -0
  42. teradataml/data/patient_profile.csv +101 -0
  43. teradataml/data/scripts/lightgbm/dataset.template +157 -0
  44. teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
  45. teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
  46. teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
  47. teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
  48. teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
  49. teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
  50. teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
  51. teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
  52. teradataml/data/target_udt_data.csv +8 -0
  53. teradataml/data/templates/open_source_ml.json +3 -2
  54. teradataml/data/vectordistance_example.json +4 -0
  55. teradataml/dataframe/dataframe.py +543 -175
  56. teradataml/dataframe/functions.py +553 -25
  57. teradataml/dataframe/sql.py +184 -15
  58. teradataml/dbutils/dbutils.py +556 -18
  59. teradataml/dbutils/filemgr.py +48 -1
  60. teradataml/lib/aed_0_1.dll +0 -0
  61. teradataml/opensource/__init__.py +1 -1
  62. teradataml/opensource/{sklearn/_class.py → _class.py} +102 -17
  63. teradataml/opensource/_lightgbm.py +950 -0
  64. teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +1 -2
  65. teradataml/opensource/{sklearn/constants.py → constants.py} +13 -10
  66. teradataml/opensource/sklearn/__init__.py +0 -1
  67. teradataml/opensource/sklearn/_sklearn_wrapper.py +798 -438
  68. teradataml/options/__init__.py +7 -23
  69. teradataml/options/configure.py +29 -3
  70. teradataml/scriptmgmt/UserEnv.py +3 -3
  71. teradataml/scriptmgmt/lls_utils.py +74 -21
  72. teradataml/store/__init__.py +13 -0
  73. teradataml/store/feature_store/__init__.py +0 -0
  74. teradataml/store/feature_store/constants.py +291 -0
  75. teradataml/store/feature_store/feature_store.py +2223 -0
  76. teradataml/store/feature_store/models.py +1505 -0
  77. teradataml/store/vector_store/__init__.py +1586 -0
  78. teradataml/table_operators/query_generator.py +3 -0
  79. teradataml/table_operators/table_operator_query_generator.py +3 -1
  80. teradataml/table_operators/table_operator_util.py +37 -38
  81. teradataml/table_operators/templates/dataframe_register.template +69 -0
  82. teradataml/utils/dtypes.py +4 -2
  83. teradataml/utils/validators.py +33 -1
  84. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/METADATA +200 -5
  85. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/RECORD +88 -65
  86. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/WHEEL +0 -0
  87. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/top_level.txt +0 -0
  88. {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.3.dist-info}/zip-safe +0 -0
@@ -1,4 +1,4 @@
1
- def TextParser(data=None, object=None, text_column=None, covert_to_lowercase=True, stem_tokens=False,
1
+ def TextParser(data=None, object=None, text_column=None, convert_to_lowercase=True, stem_tokens=False,
2
2
  remove_stopwords=False, accumulate=None, delimiter=" \t\n\f\r",
3
3
  punctuation="!#$%&()*+,-./:;?@\^_`{|}~", token_col_name=None, **generic_arguments):
4
4
  """
@@ -38,7 +38,7 @@ def TextParser(data=None, object=None, text_column=None, covert_to_lowercase=Tru
38
38
  Specifies the name of the input data column whose contents are to be tokenized.
39
39
  Types: str
40
40
 
41
- covert_to_lowercase:
41
+ convert_to_lowercase:
42
42
  Optional Argument.
43
43
  Specifies whether to convert the text in "text_column" to lowercase.
44
44
  Default Value: True
@@ -165,7 +165,7 @@ def TextParser(data=None, object=None, text_column=None, covert_to_lowercase=Tru
165
165
  # Example 2 : Convert words in "text_data" column into their root forms.
166
166
  TextParser_out = TextParser(data=complaints,
167
167
  text_column="text_data",
168
- covert_to_lowercase=True,
168
+ convert_to_lowercase=True,
169
169
  stem_tokens=True)
170
170
 
171
171
  # Print the result DataFrame.
@@ -0,0 +1,118 @@
1
+ def Image2Matrix(data=None,
2
+ output='gray',
3
+ **generic_arguments):
4
+ """
5
+ DESCRIPTION:
6
+ Image2Matrix() function converts an image to a matrix.
7
+ It converts JPEG or PNG images to matrixes with payload values being the pixel values.
8
+ Note:
9
+ * The image size cannot be greater than 16 MB.
10
+ * The image should not exceed 4,000,000 pixels.
11
+
12
+ PARAMETERS:
13
+ data:
14
+ Required Argument.
15
+ Specifies the teradataml DataFrame which has image details.
16
+ Types: Teradataml DataFrame
17
+
18
+ output:
19
+ Optional Argument.
20
+ Specifies the type of output matrix.
21
+ Default: 'gray'
22
+ Permitted Values:
23
+ 'gray': Converts the image to a grayscale matrix.
24
+ 'rgb': Converts the image to a RGB matrix.
25
+ Types: str
26
+
27
+ **generic_arguments:
28
+ Specifies the generic keyword arguments SQLE functions accept.
29
+ Below are the generic keyword arguments:
30
+ persist:
31
+ Optional Argument.
32
+ Specifies whether to persist the results of the function in table or not.
33
+ When set to True, results are persisted in table; otherwise, results
34
+ are garbage collected at the end of the session.
35
+ Default Value: False
36
+ Types: boolean
37
+
38
+ volatile:
39
+ Optional Argument.
40
+ Specifies whether to put the results of the function in volatile table or not.
41
+ When set to True, results are stored in volatile table, otherwise not.
42
+ Default Value: False
43
+ Types: boolean
44
+
45
+ Function allows the user to partition, hash, order or local order the input
46
+ data. These generic arguments are available for each argument that accepts
47
+ teradataml DataFrame as input and can be accessed as:
48
+ * "<input_data_arg_name>_partition_column" accepts str or list of str (Strings)
49
+ * "<input_data_arg_name>_hash_column" accepts str or list of str (Strings)
50
+ * "<input_data_arg_name>_order_column" accepts str or list of str (Strings)
51
+ * "local_order_<input_data_arg_name>" accepts boolean
52
+ Note:
53
+ These generic arguments are supported by teradataml if the underlying Analytic Database
54
+ function supports, else an exception is raised.
55
+
56
+ RETURNS:
57
+ Instance of Image2Matrix.
58
+ Output teradataml DataFrames can be accessed using attribute
59
+ references, such as Image2Matrix.<attribute_name>.
60
+ Output teradataml DataFrame attribute name is:
61
+ result
62
+
63
+ RAISES:
64
+ TeradataMlException, TypeError, ValueError
65
+
66
+ EXAMPLES:
67
+ # Notes:
68
+ # 1. Get the connection to Vantage, before importing the
69
+ # function in user space.
70
+ # 2. User can import the function, if it is available on
71
+ # Vantage user is connected to.
72
+ # 3. To check the list of UAF analytic functions available
73
+ # on Vantage user connected to, use
74
+ # "display_analytic_functions()".
75
+
76
+ # Check the list of available analytic functions.
77
+ display_analytic_functions()
78
+
79
+ # Import function Image2Matrix.
80
+ from teradataml import Image2Matrix
81
+ import teradataml
82
+
83
+ # Drop the image table if it is present.
84
+ try:
85
+ db_drop_table('imageTable')
86
+ except:
87
+ pass
88
+
89
+ # Create a table to store the image data.
90
+ execute_sql('CREATE TABLE imageTable(id INTEGER, image BLOB);')
91
+
92
+ # Load the image data into the fileContent variable.
93
+ file_dir = os.path.join(os.path.dirname(teradataml.__file__), "data")
94
+ with open(os.path.join(file_dir,'peppers.png'), mode='rb') as file:
95
+ fileContent = file.read()
96
+
97
+ # Insert the image data into the table.
98
+ sql = 'INSERT INTO imageTable VALUES(?, ?);'
99
+ parameters = (1, fileContent)
100
+ execute_sql(sql, parameters)
101
+
102
+ # Create a DataFrame for the image table.
103
+ imageTable = DataFrame('imageTable')
104
+
105
+ # Example 1: Convert the image to matrix with gray values.
106
+ image2matrix = Image2Matrix(data=imageTable.select(['id', 'image']),
107
+ output='gray')
108
+
109
+ # Print the result DataFrame.
110
+ print(image2matrix.result)
111
+
112
+ # Example 2: Convert the image to matrix with rgb values.
113
+ image2matrix2 = Image2Matrix(data=imageTable.select(['id', 'image']),
114
+ output='rgb')
115
+
116
+ # Print the result DataFrame.
117
+ print(image2matrix2.result)
118
+ """
@@ -0,0 +1,145 @@
1
+ def CopyArt(data=None, database_name = None,
2
+ table_name = None, map_name = None,
3
+ **generic_arguments):
4
+ """
5
+ DESCRIPTION:
6
+ CopyArt() function creates a copy of an existing analytics result table (ART).
7
+
8
+ PARAMETERS:
9
+ data:
10
+ Required Argument.
11
+ Specifies the ART data to be copied.
12
+ Types: DataFrame
13
+
14
+ database_name:
15
+ Required Argument.
16
+ Specifies the name of the destination database for copied ART.
17
+ Types: str
18
+
19
+ table_name:
20
+ Required Argument.
21
+ Specifies the name of the destination table for copied ART.
22
+ Types: str
23
+
24
+ map_name:
25
+ Optional Argument.
26
+ Specifies the name of the map for the destination ART.
27
+ By default, it refers to the map of the 'data'.
28
+ Types: str
29
+
30
+ **generic_arguments:
31
+ Specifies the generic keyword arguments of UAF functions.
32
+ Below are the generic keyword arguments:
33
+ persist:
34
+ Optional Argument.
35
+ Specifies whether to persist the results of the
36
+ function in a table or not. When set to True,
37
+ results are persisted in a table; otherwise,
38
+ results are garbage collected at the end of the
39
+ session.
40
+ Note that, when UAF function is executed, an
41
+ analytic result table (ART) is created.
42
+ Default Value: False
43
+ Types: bool
44
+
45
+ volatile:
46
+ Optional Argument.
47
+ Specifies whether to put the results of the
48
+ function in a volatile ART or not. When set to
49
+ True, results are stored in a volatile ART,
50
+ otherwise not.
51
+ Default Value: False
52
+ Types: bool
53
+
54
+ output_table_name:
55
+ Optional Argument.
56
+ Specifies the name of the table to store results.
57
+ If not specified, a unique table name is internally
58
+ generated.
59
+ Types: str
60
+
61
+ output_db_name:
62
+ Optional Argument.
63
+ Specifies the name of the database to create output
64
+ table into. If not specified, table is created into
65
+ database specified by the user at the time of context
66
+ creation or configuration parameter. Argument is ignored,
67
+ if "output_table_name" is not specified.
68
+ Types: str
69
+
70
+ RETURNS:
71
+ Instance of CopyArt.
72
+ Output teradataml DataFrames can be accessed using attribute
73
+ references, such as obj.<attribute_name>.
74
+ Output teradataml DataFrame attribute name is:
75
+ 1. result
76
+
77
+ RAISES:
78
+ TeradataMlException, TypeError, ValueError
79
+
80
+ EXAMPLES:
81
+ # Notes:
82
+ # 1. Get the connection to Vantage, before importing the
83
+ # function in user space.
84
+ # 2. User can import the function, if it is available on
85
+ # Vantage user is connected to.
86
+ # 3. To check the list of UAF analytic functions available
87
+ # on Vantage user connected to, use
88
+ # "display_analytic_functions()".
89
+
90
+ # Check the list of available UAF analytic functions.
91
+ display_analytic_functions(type="UAF")
92
+
93
+ # Import function CopyArt.
94
+ from teradataml import CopyArt, AutoArima
95
+
96
+ # Load the example data.
97
+ load_example_data("uaf", ["blood2ageandweight"])
98
+
99
+ # Create teradataml DataFrame object.
100
+ data = DataFrame.from_table("blood2ageandweight")
101
+
102
+ # Create teradataml TDSeries object.
103
+ data_series_df = TDSeries(data=data,
104
+ id="PatientID",
105
+ row_index="SeqNo",
106
+ row_index_style="SEQUENCE",
107
+ payload_field="BloodFat",
108
+ payload_content="REAL")
109
+
110
+ # Execute AutoArima function to create ART.
111
+ uaf_out = AutoArima(data=data_series_df,
112
+ start_pq_nonseasonal=[1, 1],
113
+ seasonal=False,
114
+ constant=True,
115
+ algorithm="MLE",
116
+ fit_percentage=80,
117
+ stepwise=True,
118
+ nmodels=7,
119
+ fit_metrics=True,
120
+ residuals=True)
121
+
122
+ # Example 1: Execute CopyArt function to copy ART to a destination table name
123
+ # with persist option.
124
+ res = CopyArt(data=uaf_out.result,
125
+ database_name="alice",
126
+ table_name="copied_table",
127
+ persist=True)
128
+ print(res.result)
129
+
130
+ # Example 2: Execute CopyArt function to copy ART to a destination table name.
131
+ res = CopyArt(data=uaf_out.result,
132
+ database_name="alice",
133
+ table_name="copied_table2")
134
+
135
+ # Print the result DataFrame.
136
+ print(res.result)
137
+
138
+ # Example 3: Copy ART to a destination table name using uaf object.
139
+ res = uaf_out.copy(database_name="alice",
140
+ table_name="copied_table3")
141
+
142
+ # Print the result DataFrame.
143
+ print(res.result)
144
+
145
+ """
@@ -1,5 +1,5 @@
1
1
  def DickeyFuller(data=None, data_filter_expr=None, algorithm=None,
2
- max_lags=None, drift_trend_formula=None,
2
+ max_lags=0,
3
3
  **generic_arguments):
4
4
  """
5
5
  DESCRIPTION:
@@ -10,18 +10,23 @@ def DickeyFuller(data=None, data_filter_expr=None, algorithm=None,
10
10
  other factors.
11
11
 
12
12
  The following procedure is an example of how to use DickeyFuller() function:
13
- * Run regression tests.
14
- * Determine the algorithm for Dickey Fuller statistic data.
15
- * Run DickeyFuller() function using the algorithm.
16
- * (Result shows series contains unit roots) Use DIFF() and
17
- SeasonalNormalize() functions to remove unit roots.
13
+ * Run DickeyFuller() on the time series being modeled.
14
+ * Retrieve the results of the DickeyFuller() test to determine if the
15
+ time series contains any unit roots.
16
+ * If unit roots are present, use a technique such as differencing such as Diff()
17
+ or seasonal normalization, such as SeasonalNormalize(), to create a new series,
18
+ then rerun the DickeyFuller() test to verify that the differenced or
19
+ seasonally-normalized series unit root are removed.
20
+ * If the result shows unit roots, use Diff() and SeasonalNormalize()
21
+ to remove unit roots.
18
22
 
19
23
 
20
24
  PARAMETERS:
21
25
  data:
22
26
  Required Argument.
23
- Speciifes a single logical-runtime series as an input.
24
- Types: TDSeries
27
+ Specifies a single logical-runtime series as an input or TDAnalyticResult which
28
+ contains ARTFITRESIDUALS layer.
29
+ Types: TDSeries, TDAnalyticResult
25
30
 
26
31
  data_filter_expr:
27
32
  Optional Argument.
@@ -34,26 +39,18 @@ def DickeyFuller(data=None, data_filter_expr=None, algorithm=None,
34
39
  Permitted Values:
35
40
  * NONE: Random walk
36
41
  * DRIFT: Random walk with drift
37
- * TREND: Random walk with linear trend
38
42
  * DRIFTNTREND: Random walk with drift and trend
39
- * FORMULA: Random walk with selected drift, trend and
40
- auxiliary lags
43
+ * SQUARED: Random walk with drift, trend, and
44
+ quadratic trend.
41
45
  Types: str
42
46
 
43
47
  max_lags:
44
48
  Optional Argument.
45
49
  Specifies the maximum number of lags to use with the regression
46
- equation.
50
+ equation. Range is [0, 100]
51
+ DefaultValue: 0
47
52
  Types: int
48
53
 
49
- drift_trend_formula:
50
- Optional Argument.
51
- Specifies the formula used to represent the drift and trend portions
52
- of the regression.
53
- Note:
54
- * Valid only when "algorithm" is set to 'formula'.
55
- Types: str
56
-
57
54
  **generic_arguments:
58
55
  Specifies the generic keyword arguments of UAF functions.
59
56
  Below are the generic keyword arguments:
@@ -136,7 +133,7 @@ def DickeyFuller(data=None, data_filter_expr=None, algorithm=None,
136
133
  # for the presence of the unit roots using random walk with
137
134
  # linear trend for regression.
138
135
  uaf_out = DickeyFuller(data=data_series_df,
139
- algorithm='TREND')
136
+ algorithm='DRIFT')
140
137
 
141
138
  # Print the result DataFrame.
142
139
  print(uaf_out.result)
@@ -80,7 +80,7 @@
80
80
  "description": "Specifies whether to convert input text to lowercase.",
81
81
  "datatype": "BOOLEAN",
82
82
  "allowsLists": false,
83
- "rName": "covert.to.lowercase",
83
+ "rName": "convert.to.lowercase",
84
84
  "useInR": true,
85
85
  "rOrderNum": 4
86
86
  },
@@ -0,0 +1,250 @@
1
+ {
2
+ "json_schema_major_version": "1",
3
+ "json_schema_minor_version": "1",
4
+ "json_content_version": "1",
5
+ "function_name": "TD_KMeans",
6
+ "function_version": "1.0",
7
+ "function_type": "fastpath",
8
+ "function_category": "Model Training",
9
+ "function_alias_name": "TD_KMeans",
10
+ "function_r_name": "aa.td_kmeans",
11
+ "short_description": "fastpath function to generate clustering model using KMeans algorithm.",
12
+ "long_description": "fastpath function to generate clustering model containing cluster centroids using KMeans algorithm.",
13
+ "input_tables": [
14
+ {
15
+ "requiredInputKind": [
16
+ "PartitionByAny"
17
+ ],
18
+ "isOrdered": false,
19
+ "partitionByOne": false,
20
+ "name": "InputTable",
21
+ "alternateNames": [],
22
+ "isRequired": true,
23
+ "rDescription": "The relation that contains input data.",
24
+ "description": "The relation that contains input data.",
25
+ "datatype": "TABLE_ALIAS",
26
+ "allowsLists": false,
27
+ "rName": "data",
28
+ "useInR": true,
29
+ "rOrderNum": 1
30
+ },
31
+ {
32
+ "requiredInputKind": [
33
+ "Dimension"
34
+ ],
35
+ "isOrdered": false,
36
+ "partitionByOne": false,
37
+ "name": "InitialCentroidsTable",
38
+ "alternateNames": [],
39
+ "isRequired": false,
40
+ "rDescription": "The relation that contains set of initial centroids.",
41
+ "description": "The relation that contains set of initial centroids.",
42
+ "datatype": "TABLE_ALIAS",
43
+ "allowsLists": false,
44
+ "rName": "centroids.table",
45
+ "useInR": true,
46
+ "rOrderNum": 2
47
+ }
48
+ ],
49
+ "output_tables": [
50
+ {
51
+ "isOutputTable": true,
52
+ "omitPossible": true,
53
+ "name": "ModelTable",
54
+ "alternateNames": [],
55
+ "isRequired": false,
56
+ "rDescription": "Specifies the name of the table in which the generated KMeans model can be stored.",
57
+ "description": "Specifies the name of the table in which the generated KMeans model can be stored.",
58
+ "datatype": "TABLE_NAME",
59
+ "allowsLists": false,
60
+ "rName": "model.table",
61
+ "useInR": true,
62
+ "rOrderNum": 3
63
+ }
64
+ ],
65
+ "argument_clauses": [
66
+ {
67
+ "targetTable": [
68
+ "InputTable"
69
+ ],
70
+ "checkDuplicate": true,
71
+ "allowedTypes": [],
72
+ "allowedTypeGroups": [
73
+ "ALL"
74
+ ],
75
+ "matchLengthOfArgument": "",
76
+ "allowPadding": false,
77
+ "name": "IdColumn",
78
+ "alternateNames": [],
79
+ "isRequired": true,
80
+ "rDescription": "Specifies the column which is unique identifier of input row.",
81
+ "description": "Specifies the column which is unique identifier of input row.",
82
+ "datatype": "COLUMNS",
83
+ "allowsLists": false,
84
+ "rName": "id.column",
85
+ "useInR": true,
86
+ "rOrderNum": 4
87
+ },
88
+ {
89
+ "targetTable": [
90
+ "InputTable"
91
+ ],
92
+ "checkDuplicate": true,
93
+ "allowedTypes": [],
94
+ "allowedTypeGroups": [
95
+ "NUMERIC","AIVECTOR","BYTE","VARBYTE"
96
+ ],
97
+ "matchLengthOfArgument": "",
98
+ "allowPadding": false,
99
+ "name": "TargetColumns",
100
+ "alternateNames": [],
101
+ "isRequired": true,
102
+ "rDescription": "Specifies the columns/features to be used to cluster the data.",
103
+ "description": "Specifies the columns/features to be used to cluster the data.",
104
+ "datatype": "COLUMNS",
105
+ "allowsLists": true,
106
+ "rName": "target.columns",
107
+ "useInR": true,
108
+ "rOrderNum": 5
109
+ },
110
+ {
111
+ "lowerBound": 1,
112
+ "upperBound": 2147483647,
113
+ "lowerBoundType": "EXCLUSIVE",
114
+ "upperBoundType": "INCLUSIVE",
115
+ "allowNaN": false,
116
+ "name": "NumClusters",
117
+ "alternateNames": [],
118
+ "isRequired": false,
119
+ "rDescription": "Specifies the number of clusters to be produced. This argument is not allowed with InitialCentroidsTable provided.",
120
+ "description": "Specifies the number of clusters to be produced. This argument is not allowed with InitialCentroidsTable provided.",
121
+ "datatype": "INTEGER",
122
+ "allowsLists": false,
123
+ "rName": "num.clusters",
124
+ "useInR": true,
125
+ "rOrderNum": 6
126
+ },
127
+ {
128
+ "lowerBound": 0,
129
+ "upperBound": 2147483647,
130
+ "lowerBoundType": "INCLUSIVE",
131
+ "upperBoundType": "INCLUSIVE",
132
+ "allowNaN": false,
133
+ "name": "Seed",
134
+ "alternateNames": [],
135
+ "isRequired": false,
136
+ "rDescription": "Specify the random seed the algorithm uses for repeatable results. The algorithm uses the seed to randomly sample the input table rows as initial clusters.",
137
+ "description": "Specify the random seed the algorithm uses for repeatable results. The algorithm uses the seed to randomly sample the input table rows as initial clusters.",
138
+ "datatype": "INTEGER",
139
+ "allowsLists": false,
140
+ "rName": "seed",
141
+ "useInR": true,
142
+ "rOrderNum": 7
143
+ },
144
+ {
145
+ "defaultValue": 0.0395,
146
+ "lowerBound": 0,
147
+ "upperBound": 1.797e+308,
148
+ "lowerBoundType": "INCLUSIVE",
149
+ "upperBoundType": "INCLUSIVE",
150
+ "allowNaN": false,
151
+ "name": "StopThreshold",
152
+ "alternateNames": [],
153
+ "isRequired": false,
154
+ "rDescription": "Specify the convergence threshold. When the centroids move by less than this amount, the algorithm has converged.",
155
+ "description": "Specify the convergence threshold. When the centroids move by less than this amount, the algorithm has converged.",
156
+ "datatype": "DOUBLE",
157
+ "allowsLists": false,
158
+ "rName": "threshold",
159
+ "useInR": true,
160
+ "rOrderNum": 8
161
+ },
162
+ {
163
+ "defaultValue": 10,
164
+ "lowerBound": 1,
165
+ "upperBound": 2147483647,
166
+ "lowerBoundType": "INCLUSIVE",
167
+ "upperBoundType": "INCLUSIVE",
168
+ "allowNaN": false,
169
+ "name": "MaxIterNum",
170
+ "alternateNames": [],
171
+ "isRequired": false,
172
+ "rDescription": "Specify the maximum number of iterations that the algorithm runs before quitting if the convergence threshold has not been met.",
173
+ "description": "Specify the maximum number of iterations that the algorithm runs before quitting if the convergence threshold has not been met.",
174
+ "datatype": "INTEGER",
175
+ "allowsLists": false,
176
+ "rName": "iter.max",
177
+ "useInR": true,
178
+ "rOrderNum": 9
179
+ },
180
+ {
181
+ "defaultValue": 1,
182
+ "lowerBound": 1,
183
+ "upperBound": 2147483647,
184
+ "lowerBoundType": "INCLUSIVE",
185
+ "upperBoundType": "INCLUSIVE",
186
+ "allowNaN": false,
187
+ "name": "NumInit",
188
+ "alternateNames": [],
189
+ "isRequired": false,
190
+ "rDescription": "The number of times, the k-means algorithm will be run with different initial centroid seeds. The function will emit out the model having the least value of Total Within Cluster Squared Sum.",
191
+ "description": "The number of times, the k-means algorithm will be run with different initial centroid seeds. The function will emit out the model having the least value of Total Within Cluster Squared Sum.",
192
+ "datatype": "INTEGER",
193
+ "allowsLists": false,
194
+ "rName": "num.init",
195
+ "useInR": true,
196
+ "rOrderNum": 10
197
+ },
198
+ {
199
+ "defaultValue": false,
200
+ "name": "OutputClusterAssignment",
201
+ "alternateNames": [],
202
+ "isRequired": false,
203
+ "rDescription": "Specifies whether to output Cluster Assignment.",
204
+ "description": "Specifies whether to output Cluster Assignment.",
205
+ "datatype": "BOOLEAN",
206
+ "allowsLists": false,
207
+ "rName": "output.cluster.assignment",
208
+ "useInR": true,
209
+ "rOrderNum": 11
210
+ },
211
+ {
212
+ "permittedValues": [
213
+ "RANDOM",
214
+ "KMEANS++"
215
+ ],
216
+ "defaultValue": "RANDOM",
217
+ "isOutputColumn": false,
218
+ "matchLengthOfArgument": "",
219
+ "allowPadding": false,
220
+ "name": "InitialCentroidsMethod",
221
+ "alternateNames": [],
222
+ "isRequired": false,
223
+ "rDescription": "Specifies the initialization method to be used for selecting initial set of centroids.",
224
+ "description": "Specifies the initialization method to be used for selecting initial set of centroids.",
225
+ "datatype": "STRING",
226
+ "allowsLists": false,
227
+ "rName": "initialcentroids.method",
228
+ "useInR": true,
229
+ "rOrderNum": 12
230
+ },
231
+ {
232
+ "defaultValue": 1,
233
+ "lowerBound": 1,
234
+ "upperBound": 4096,
235
+ "lowerBoundType": "INCLUSIVE",
236
+ "upperBoundType": "INCLUSIVE",
237
+ "allowNaN": false,
238
+ "name": "EmbeddingSize",
239
+ "alternateNames": [],
240
+ "isRequired": false,
241
+ "rDescription": "Specify the embedding size of the vectors.",
242
+ "description": "Specify the embedding size of the vectors.",
243
+ "datatype": "INTEGER",
244
+ "allowsLists": false,
245
+ "rName": "embedding.size",
246
+ "useInR": true,
247
+ "rOrderNum": 13
248
+ }
249
+ ]
250
+ }