teradataml 20.0.0.2__py3-none-any.whl → 20.0.0.4__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +315 -2
- teradataml/__init__.py +4 -0
- teradataml/_version.py +1 -1
- teradataml/analytics/analytic_function_executor.py +95 -8
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/metadata.py +12 -3
- teradataml/analytics/json_parser/utils.py +7 -2
- teradataml/analytics/sqle/__init__.py +5 -1
- teradataml/analytics/table_operator/__init__.py +1 -1
- teradataml/analytics/uaf/__init__.py +1 -1
- teradataml/analytics/utils.py +4 -0
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +51 -6
- teradataml/automl/data_preparation.py +59 -35
- teradataml/automl/data_transformation.py +58 -33
- teradataml/automl/feature_engineering.py +27 -12
- teradataml/automl/model_training.py +73 -46
- teradataml/common/constants.py +88 -29
- teradataml/common/garbagecollector.py +2 -1
- teradataml/common/messagecodes.py +19 -3
- teradataml/common/messages.py +6 -1
- teradataml/common/sqlbundle.py +64 -12
- teradataml/common/utils.py +246 -47
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +161 -27
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/dataframe_example.json +18 -2
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/NaiveBayes.py +1 -1
- teradataml/data/docs/sqle/docs_17_20/Shap.py +34 -6
- teradataml/data/docs/sqle/docs_17_20/TDNaiveBayesPredict.py +4 -4
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +3 -3
- teradataml/data/docs/tableoperator/docs_17_20/Image2Matrix.py +118 -0
- teradataml/data/docs/uaf/docs_17_20/CopyArt.py +145 -0
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/DickeyFuller.py +18 -21
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +1 -1
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +250 -0
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +266 -0
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +278 -0
- teradataml/data/jsons/storedprocedure/17.20/TD_COPYART.json +71 -0
- teradataml/data/jsons/tableoperator/17.20/IMAGE2MATRIX.json +53 -0
- teradataml/data/jsons/uaf/17.20/TD_DICKEY_FULLER.json +10 -19
- teradataml/data/jsons/uaf/17.20/TD_SAX.json +3 -1
- teradataml/data/jsons/uaf/17.20/TD_WINDOWDFFT.json +15 -5
- teradataml/data/medical_readings.csv +101 -0
- teradataml/data/patient_profile.csv +101 -0
- teradataml/data/scripts/lightgbm/dataset.template +157 -0
- teradataml/data/scripts/lightgbm/lightgbm_class_functions.template +247 -0
- teradataml/data/scripts/lightgbm/lightgbm_function.template +216 -0
- teradataml/data/scripts/lightgbm/lightgbm_sklearn.template +159 -0
- teradataml/data/scripts/sklearn/sklearn_fit.py +194 -167
- teradataml/data/scripts/sklearn/sklearn_fit_predict.py +136 -115
- teradataml/data/scripts/sklearn/sklearn_function.template +14 -19
- teradataml/data/scripts/sklearn/sklearn_model_selection_split.py +155 -137
- teradataml/data/scripts/sklearn/sklearn_transform.py +129 -42
- teradataml/data/target_udt_data.csv +8 -0
- teradataml/data/templates/open_source_ml.json +3 -2
- teradataml/data/teradataml_example.json +8 -0
- teradataml/data/vectordistance_example.json +4 -0
- teradataml/dataframe/copy_to.py +8 -3
- teradataml/dataframe/data_transfer.py +11 -1
- teradataml/dataframe/dataframe.py +1049 -285
- teradataml/dataframe/dataframe_utils.py +152 -20
- teradataml/dataframe/functions.py +578 -35
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +185 -16
- teradataml/dbutils/dbutils.py +1049 -115
- teradataml/dbutils/filemgr.py +48 -1
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/__init__.py +1 -1
- teradataml/opensource/_base.py +1466 -0
- teradataml/opensource/_class.py +464 -0
- teradataml/opensource/{sklearn/constants.py → _constants.py} +21 -14
- teradataml/opensource/_lightgbm.py +949 -0
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/{sklearn/_wrapper_utils.py → _wrapper_utils.py} +5 -6
- teradataml/options/__init__.py +54 -38
- teradataml/options/configure.py +131 -27
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +5 -5
- teradataml/scriptmgmt/lls_utils.py +130 -40
- teradataml/store/__init__.py +12 -0
- teradataml/store/feature_store/__init__.py +0 -0
- teradataml/store/feature_store/constants.py +291 -0
- teradataml/store/feature_store/feature_store.py +2318 -0
- teradataml/store/feature_store/models.py +1505 -0
- teradataml/table_operators/Apply.py +32 -18
- teradataml/table_operators/Script.py +3 -1
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/query_generator.py +3 -0
- teradataml/table_operators/table_operator_query_generator.py +3 -1
- teradataml/table_operators/table_operator_util.py +37 -38
- teradataml/table_operators/templates/dataframe_register.template +69 -0
- teradataml/utils/dtypes.py +51 -2
- teradataml/utils/internal_buffer.py +18 -0
- teradataml/utils/validators.py +99 -8
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/METADATA +321 -5
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/RECORD +121 -94
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -1
- teradataml/opensource/sklearn/_class.py +0 -255
- teradataml/opensource/sklearn/_sklearn_wrapper.py +0 -1800
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.2.dist-info → teradataml-20.0.0.4.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,250 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "1",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_KMeans",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "fastpath",
|
|
8
|
+
"function_category": "Model Training",
|
|
9
|
+
"function_alias_name": "TD_KMeans",
|
|
10
|
+
"function_r_name": "aa.td_kmeans",
|
|
11
|
+
"short_description": "fastpath function to generate clustering model using KMeans algorithm.",
|
|
12
|
+
"long_description": "fastpath function to generate clustering model containing cluster centroids using KMeans algorithm.",
|
|
13
|
+
"input_tables": [
|
|
14
|
+
{
|
|
15
|
+
"requiredInputKind": [
|
|
16
|
+
"PartitionByAny"
|
|
17
|
+
],
|
|
18
|
+
"isOrdered": false,
|
|
19
|
+
"partitionByOne": false,
|
|
20
|
+
"name": "InputTable",
|
|
21
|
+
"alternateNames": [],
|
|
22
|
+
"isRequired": true,
|
|
23
|
+
"rDescription": "The relation that contains input data.",
|
|
24
|
+
"description": "The relation that contains input data.",
|
|
25
|
+
"datatype": "TABLE_ALIAS",
|
|
26
|
+
"allowsLists": false,
|
|
27
|
+
"rName": "data",
|
|
28
|
+
"useInR": true,
|
|
29
|
+
"rOrderNum": 1
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"requiredInputKind": [
|
|
33
|
+
"Dimension"
|
|
34
|
+
],
|
|
35
|
+
"isOrdered": false,
|
|
36
|
+
"partitionByOne": false,
|
|
37
|
+
"name": "InitialCentroidsTable",
|
|
38
|
+
"alternateNames": [],
|
|
39
|
+
"isRequired": false,
|
|
40
|
+
"rDescription": "The relation that contains set of initial centroids.",
|
|
41
|
+
"description": "The relation that contains set of initial centroids.",
|
|
42
|
+
"datatype": "TABLE_ALIAS",
|
|
43
|
+
"allowsLists": false,
|
|
44
|
+
"rName": "centroids.table",
|
|
45
|
+
"useInR": true,
|
|
46
|
+
"rOrderNum": 2
|
|
47
|
+
}
|
|
48
|
+
],
|
|
49
|
+
"output_tables": [
|
|
50
|
+
{
|
|
51
|
+
"isOutputTable": true,
|
|
52
|
+
"omitPossible": true,
|
|
53
|
+
"name": "ModelTable",
|
|
54
|
+
"alternateNames": [],
|
|
55
|
+
"isRequired": false,
|
|
56
|
+
"rDescription": "Specifies the name of the table in which the generated KMeans model can be stored.",
|
|
57
|
+
"description": "Specifies the name of the table in which the generated KMeans model can be stored.",
|
|
58
|
+
"datatype": "TABLE_NAME",
|
|
59
|
+
"allowsLists": false,
|
|
60
|
+
"rName": "model.table",
|
|
61
|
+
"useInR": true,
|
|
62
|
+
"rOrderNum": 3
|
|
63
|
+
}
|
|
64
|
+
],
|
|
65
|
+
"argument_clauses": [
|
|
66
|
+
{
|
|
67
|
+
"targetTable": [
|
|
68
|
+
"InputTable"
|
|
69
|
+
],
|
|
70
|
+
"checkDuplicate": true,
|
|
71
|
+
"allowedTypes": [],
|
|
72
|
+
"allowedTypeGroups": [
|
|
73
|
+
"ALL"
|
|
74
|
+
],
|
|
75
|
+
"matchLengthOfArgument": "",
|
|
76
|
+
"allowPadding": false,
|
|
77
|
+
"name": "IdColumn",
|
|
78
|
+
"alternateNames": [],
|
|
79
|
+
"isRequired": true,
|
|
80
|
+
"rDescription": "Specifies the column which is unique identifier of input row.",
|
|
81
|
+
"description": "Specifies the column which is unique identifier of input row.",
|
|
82
|
+
"datatype": "COLUMNS",
|
|
83
|
+
"allowsLists": false,
|
|
84
|
+
"rName": "id.column",
|
|
85
|
+
"useInR": true,
|
|
86
|
+
"rOrderNum": 4
|
|
87
|
+
},
|
|
88
|
+
{
|
|
89
|
+
"targetTable": [
|
|
90
|
+
"InputTable"
|
|
91
|
+
],
|
|
92
|
+
"checkDuplicate": true,
|
|
93
|
+
"allowedTypes": [],
|
|
94
|
+
"allowedTypeGroups": [
|
|
95
|
+
"NUMERIC","VECTOR","BYTE","VARBYTE"
|
|
96
|
+
],
|
|
97
|
+
"matchLengthOfArgument": "",
|
|
98
|
+
"allowPadding": false,
|
|
99
|
+
"name": "TargetColumns",
|
|
100
|
+
"alternateNames": [],
|
|
101
|
+
"isRequired": true,
|
|
102
|
+
"rDescription": "Specifies the columns/features to be used to cluster the data.",
|
|
103
|
+
"description": "Specifies the columns/features to be used to cluster the data.",
|
|
104
|
+
"datatype": "COLUMNS",
|
|
105
|
+
"allowsLists": true,
|
|
106
|
+
"rName": "target.columns",
|
|
107
|
+
"useInR": true,
|
|
108
|
+
"rOrderNum": 5
|
|
109
|
+
},
|
|
110
|
+
{
|
|
111
|
+
"lowerBound": 1,
|
|
112
|
+
"upperBound": 2147483647,
|
|
113
|
+
"lowerBoundType": "EXCLUSIVE",
|
|
114
|
+
"upperBoundType": "INCLUSIVE",
|
|
115
|
+
"allowNaN": false,
|
|
116
|
+
"name": "NumClusters",
|
|
117
|
+
"alternateNames": [],
|
|
118
|
+
"isRequired": false,
|
|
119
|
+
"rDescription": "Specifies the number of clusters to be produced. This argument is not allowed with InitialCentroidsTable provided.",
|
|
120
|
+
"description": "Specifies the number of clusters to be produced. This argument is not allowed with InitialCentroidsTable provided.",
|
|
121
|
+
"datatype": "INTEGER",
|
|
122
|
+
"allowsLists": false,
|
|
123
|
+
"rName": "num.clusters",
|
|
124
|
+
"useInR": true,
|
|
125
|
+
"rOrderNum": 6
|
|
126
|
+
},
|
|
127
|
+
{
|
|
128
|
+
"lowerBound": 0,
|
|
129
|
+
"upperBound": 2147483647,
|
|
130
|
+
"lowerBoundType": "INCLUSIVE",
|
|
131
|
+
"upperBoundType": "INCLUSIVE",
|
|
132
|
+
"allowNaN": false,
|
|
133
|
+
"name": "Seed",
|
|
134
|
+
"alternateNames": [],
|
|
135
|
+
"isRequired": false,
|
|
136
|
+
"rDescription": "Specify the random seed the algorithm uses for repeatable results. The algorithm uses the seed to randomly sample the input table rows as initial clusters.",
|
|
137
|
+
"description": "Specify the random seed the algorithm uses for repeatable results. The algorithm uses the seed to randomly sample the input table rows as initial clusters.",
|
|
138
|
+
"datatype": "INTEGER",
|
|
139
|
+
"allowsLists": false,
|
|
140
|
+
"rName": "seed",
|
|
141
|
+
"useInR": true,
|
|
142
|
+
"rOrderNum": 7
|
|
143
|
+
},
|
|
144
|
+
{
|
|
145
|
+
"defaultValue": 0.0395,
|
|
146
|
+
"lowerBound": 0,
|
|
147
|
+
"upperBound": 1.797e+308,
|
|
148
|
+
"lowerBoundType": "INCLUSIVE",
|
|
149
|
+
"upperBoundType": "INCLUSIVE",
|
|
150
|
+
"allowNaN": false,
|
|
151
|
+
"name": "StopThreshold",
|
|
152
|
+
"alternateNames": [],
|
|
153
|
+
"isRequired": false,
|
|
154
|
+
"rDescription": "Specify the convergence threshold. When the centroids move by less than this amount, the algorithm has converged.",
|
|
155
|
+
"description": "Specify the convergence threshold. When the centroids move by less than this amount, the algorithm has converged.",
|
|
156
|
+
"datatype": "DOUBLE",
|
|
157
|
+
"allowsLists": false,
|
|
158
|
+
"rName": "threshold",
|
|
159
|
+
"useInR": true,
|
|
160
|
+
"rOrderNum": 8
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"defaultValue": 10,
|
|
164
|
+
"lowerBound": 1,
|
|
165
|
+
"upperBound": 2147483647,
|
|
166
|
+
"lowerBoundType": "INCLUSIVE",
|
|
167
|
+
"upperBoundType": "INCLUSIVE",
|
|
168
|
+
"allowNaN": false,
|
|
169
|
+
"name": "MaxIterNum",
|
|
170
|
+
"alternateNames": [],
|
|
171
|
+
"isRequired": false,
|
|
172
|
+
"rDescription": "Specify the maximum number of iterations that the algorithm runs before quitting if the convergence threshold has not been met.",
|
|
173
|
+
"description": "Specify the maximum number of iterations that the algorithm runs before quitting if the convergence threshold has not been met.",
|
|
174
|
+
"datatype": "INTEGER",
|
|
175
|
+
"allowsLists": false,
|
|
176
|
+
"rName": "iter.max",
|
|
177
|
+
"useInR": true,
|
|
178
|
+
"rOrderNum": 9
|
|
179
|
+
},
|
|
180
|
+
{
|
|
181
|
+
"defaultValue": 1,
|
|
182
|
+
"lowerBound": 1,
|
|
183
|
+
"upperBound": 2147483647,
|
|
184
|
+
"lowerBoundType": "INCLUSIVE",
|
|
185
|
+
"upperBoundType": "INCLUSIVE",
|
|
186
|
+
"allowNaN": false,
|
|
187
|
+
"name": "NumInit",
|
|
188
|
+
"alternateNames": [],
|
|
189
|
+
"isRequired": false,
|
|
190
|
+
"rDescription": "The number of times, the k-means algorithm will be run with different initial centroid seeds. The function will emit out the model having the least value of Total Within Cluster Squared Sum.",
|
|
191
|
+
"description": "The number of times, the k-means algorithm will be run with different initial centroid seeds. The function will emit out the model having the least value of Total Within Cluster Squared Sum.",
|
|
192
|
+
"datatype": "INTEGER",
|
|
193
|
+
"allowsLists": false,
|
|
194
|
+
"rName": "num.init",
|
|
195
|
+
"useInR": true,
|
|
196
|
+
"rOrderNum": 10
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
"defaultValue": false,
|
|
200
|
+
"name": "OutputClusterAssignment",
|
|
201
|
+
"alternateNames": [],
|
|
202
|
+
"isRequired": false,
|
|
203
|
+
"rDescription": "Specifies whether to output Cluster Assignment.",
|
|
204
|
+
"description": "Specifies whether to output Cluster Assignment.",
|
|
205
|
+
"datatype": "BOOLEAN",
|
|
206
|
+
"allowsLists": false,
|
|
207
|
+
"rName": "output.cluster.assignment",
|
|
208
|
+
"useInR": true,
|
|
209
|
+
"rOrderNum": 11
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
"permittedValues": [
|
|
213
|
+
"RANDOM",
|
|
214
|
+
"KMEANS++"
|
|
215
|
+
],
|
|
216
|
+
"defaultValue": "RANDOM",
|
|
217
|
+
"isOutputColumn": false,
|
|
218
|
+
"matchLengthOfArgument": "",
|
|
219
|
+
"allowPadding": false,
|
|
220
|
+
"name": "InitialCentroidsMethod",
|
|
221
|
+
"alternateNames": [],
|
|
222
|
+
"isRequired": false,
|
|
223
|
+
"rDescription": "Specifies the initialization method to be used for selecting initial set of centroids.",
|
|
224
|
+
"description": "Specifies the initialization method to be used for selecting initial set of centroids.",
|
|
225
|
+
"datatype": "STRING",
|
|
226
|
+
"allowsLists": false,
|
|
227
|
+
"rName": "initialcentroids.method",
|
|
228
|
+
"useInR": true,
|
|
229
|
+
"rOrderNum": 12
|
|
230
|
+
},
|
|
231
|
+
{
|
|
232
|
+
"defaultValue": 1,
|
|
233
|
+
"lowerBound": 1,
|
|
234
|
+
"upperBound": 4096,
|
|
235
|
+
"lowerBoundType": "INCLUSIVE",
|
|
236
|
+
"upperBoundType": "INCLUSIVE",
|
|
237
|
+
"allowNaN": false,
|
|
238
|
+
"name": "EmbeddingSize",
|
|
239
|
+
"alternateNames": [],
|
|
240
|
+
"isRequired": false,
|
|
241
|
+
"rDescription": "Specify the embedding size of the vectors.",
|
|
242
|
+
"description": "Specify the embedding size of the vectors.",
|
|
243
|
+
"datatype": "INTEGER",
|
|
244
|
+
"allowsLists": false,
|
|
245
|
+
"rName": "embedding.size",
|
|
246
|
+
"useInR": true,
|
|
247
|
+
"rOrderNum": 13
|
|
248
|
+
}
|
|
249
|
+
]
|
|
250
|
+
}
|
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "2",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_SMOTE",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "fastpath",
|
|
8
|
+
"function_category": "Feature Engineering Transform",
|
|
9
|
+
"function_alias_name": "TD_SMOTE",
|
|
10
|
+
"function_r_name": "aa.td_smote",
|
|
11
|
+
"ref_function_r_name": "aa.td_smote",
|
|
12
|
+
"short_description": "This function generates data by oversampling a minority class.",
|
|
13
|
+
"long_description": "This function generates data by oversampling a minority class using smote, adasyn, borderline-2 or smote-nc algorithms.",
|
|
14
|
+
"input_tables": [
|
|
15
|
+
{
|
|
16
|
+
"requiredInputKind": [
|
|
17
|
+
"PartitionByAny"
|
|
18
|
+
],
|
|
19
|
+
"isOrdered": false,
|
|
20
|
+
"partitionByOne": false,
|
|
21
|
+
"name": "InputTable",
|
|
22
|
+
"alternateNames": [],
|
|
23
|
+
"isRequired": true,
|
|
24
|
+
"rDescription": "Specifies the table containing the input data.",
|
|
25
|
+
"description": "Specifies the table containing the input data.",
|
|
26
|
+
"datatype": "TABLE_ALIAS",
|
|
27
|
+
"allowsLists": false,
|
|
28
|
+
"rName": "data",
|
|
29
|
+
"useInR": true,
|
|
30
|
+
"rOrderNum": 1
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"requiredInputKind": [
|
|
34
|
+
"Dimension"
|
|
35
|
+
],
|
|
36
|
+
|
|
37
|
+
"isOrdered": false,
|
|
38
|
+
"partitionByOne": false,
|
|
39
|
+
"name": "EncodingsTable",
|
|
40
|
+
"alternateNames": [],
|
|
41
|
+
"isRequired": false,
|
|
42
|
+
"rDescription": "Specifies the table created with TD_OrdinalEncodingFit output, using the categorical input columns as TargetColumns. Used and required only with smotenc.",
|
|
43
|
+
"description": "Specifies the table creted with TD_OrdinalEncodingFit output, using the categorical input columns as TargetColumns. Used and required only with smotenc.",
|
|
44
|
+
"datatype": "TABLE_ALIAS",
|
|
45
|
+
"allowsLists": false,
|
|
46
|
+
"rName": "encoding.data",
|
|
47
|
+
"useInR": true,
|
|
48
|
+
"rOrderNum": 2
|
|
49
|
+
}
|
|
50
|
+
],
|
|
51
|
+
"argument_clauses": [
|
|
52
|
+
{
|
|
53
|
+
"targetTable": [
|
|
54
|
+
"InputTable"
|
|
55
|
+
],
|
|
56
|
+
|
|
57
|
+
"checkDuplicate": true,
|
|
58
|
+
"allowedTypes": [],
|
|
59
|
+
"allowedTypeGroups": [
|
|
60
|
+
"INTEGER"
|
|
61
|
+
],
|
|
62
|
+
|
|
63
|
+
"matchLengthOfArgument": "",
|
|
64
|
+
"allowPadding": false,
|
|
65
|
+
"name": "IDColumn",
|
|
66
|
+
"alternateNames": [],
|
|
67
|
+
"isRequired": true,
|
|
68
|
+
"rDescription": "specifies the name of the column that uniquely identifies a data sample in the input table.",
|
|
69
|
+
"description": "specifies the name of the column that uniquely identifies a data sample in the input table.",
|
|
70
|
+
"datatype": "COLUMNS",
|
|
71
|
+
"allowsLists": false,
|
|
72
|
+
"rName": "id.column",
|
|
73
|
+
"useInR": true,
|
|
74
|
+
"rOrderNum": 3
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"targetTable": [
|
|
78
|
+
"InputTable"
|
|
79
|
+
],
|
|
80
|
+
"checkDuplicate": true,
|
|
81
|
+
"allowedTypes": [],
|
|
82
|
+
"allowedTypeGroups": [
|
|
83
|
+
"NUMERIC"
|
|
84
|
+
],
|
|
85
|
+
|
|
86
|
+
"matchLengthOfArgument": "",
|
|
87
|
+
"allowPadding": false,
|
|
88
|
+
"name": "ResponseColumn",
|
|
89
|
+
"alternateNames": [],
|
|
90
|
+
"isRequired": false,
|
|
91
|
+
"rDescription": "specifies the name of the input table column that contains the numeric value to be used as the response value for a sample.",
|
|
92
|
+
"description": "specifies the name of the input table column that contains the numeric value to be used as the response value for a sample.",
|
|
93
|
+
"datatype": "COLUMNS",
|
|
94
|
+
"allowsLists": false,
|
|
95
|
+
"rName": "response.column",
|
|
96
|
+
"useInR": true,
|
|
97
|
+
"rOrderNum": 4
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"targetTable": [
|
|
101
|
+
"InputTable"
|
|
102
|
+
],
|
|
103
|
+
"checkDuplicate": true,
|
|
104
|
+
"allowedTypes": [],
|
|
105
|
+
"allowedTypeGroups": [
|
|
106
|
+
"NUMERIC"
|
|
107
|
+
],
|
|
108
|
+
"matchLengthOfArgument": "",
|
|
109
|
+
"allowPadding": false,
|
|
110
|
+
"name": "InputColumns",
|
|
111
|
+
"alternateNames": [],
|
|
112
|
+
"isRequired": true,
|
|
113
|
+
"rDescription": "specifies the name of the input table columns that the function uses for oversampling.",
|
|
114
|
+
"description": "specifies the name of the input table columns that the function uses for oversampling.",
|
|
115
|
+
"datatype": "COLUMNS",
|
|
116
|
+
"allowsLists": true,
|
|
117
|
+
"rName": "input.columns",
|
|
118
|
+
"useInR": true,
|
|
119
|
+
"rOrderNum": 5
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"targetTable": [
|
|
123
|
+
"InputTable"
|
|
124
|
+
],
|
|
125
|
+
"checkDuplicate": true,
|
|
126
|
+
"allowedTypes": [],
|
|
127
|
+
"allowedTypeGroups": [
|
|
128
|
+
"STRING"
|
|
129
|
+
],
|
|
130
|
+
"matchLengthOfArgument": "",
|
|
131
|
+
"allowPadding": false,
|
|
132
|
+
"name": "CategoricalInputColumns",
|
|
133
|
+
"alternateNames": [],
|
|
134
|
+
"isRequired": false,
|
|
135
|
+
"rDescription": "specifies the name of the categorical columns in the input table that the function uses for oversampling with smotenc.",
|
|
136
|
+
"description": "specifies the name of the categorical columns in the input table that the function uses for oversampling with smotenc.",
|
|
137
|
+
"datatype": "COLUMNS",
|
|
138
|
+
"allowsLists": true,
|
|
139
|
+
"rName": "categorical.columns",
|
|
140
|
+
"useInR": true,
|
|
141
|
+
"rOrderNum": 6
|
|
142
|
+
},
|
|
143
|
+
{
|
|
144
|
+
"isOutputColumn": false,
|
|
145
|
+
"name": "MedianStandardDeviation",
|
|
146
|
+
"alternateNames": [],
|
|
147
|
+
"isRequired": false,
|
|
148
|
+
"rDescription": "specifies the median of the standard deviations computed over the numerical input columns. Required only with smotenc.",
|
|
149
|
+
"description": "specifies the median of the standard deviations computed over the numerical input columns. Required only with smotenc.",
|
|
150
|
+
"datatype": "DOUBLE",
|
|
151
|
+
"allowsLists": false,
|
|
152
|
+
"rName": "median.standard.deviation",
|
|
153
|
+
"useInR": true,
|
|
154
|
+
"rOrderNum": 7
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
"isOutputColumn": false,
|
|
158
|
+
"name": "MinorityClass",
|
|
159
|
+
"alternateNames": [],
|
|
160
|
+
"isRequired": true,
|
|
161
|
+
"rDescription": "specifies the minority class for which synthetic samples need to be generated. The label for minority class under response column must be numeric integer.",
|
|
162
|
+
"description": "specifies the minority class for which synthetic samples need to be generated. The label for minority class under response column must be numeric integer.",
|
|
163
|
+
"datatype": "STRING",
|
|
164
|
+
"allowsLists": false,
|
|
165
|
+
"rName": "minority.class",
|
|
166
|
+
"useInR": true,
|
|
167
|
+
"rOrderNum": 8
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
"isOutputColumn": false,
|
|
171
|
+
"allowNaN": false,
|
|
172
|
+
"name": "OversamplingFactor",
|
|
173
|
+
"alternateNames": [],
|
|
174
|
+
"isRequired": true,
|
|
175
|
+
"rDescription": "specifies the factor for oversampling the minority class.",
|
|
176
|
+
"description": "specifies the factor for oversampling the minority class.",
|
|
177
|
+
"datatype": "DOUBLE",
|
|
178
|
+
"allowsLists": false,
|
|
179
|
+
"rName": "oversampling.factor",
|
|
180
|
+
"useInR": true,
|
|
181
|
+
"rOrderNum": 9
|
|
182
|
+
},
|
|
183
|
+
{
|
|
184
|
+
"permittedValues": ["smote", "adasyn", "borderline","smotenc"],
|
|
185
|
+
"isOutputColumn": false,
|
|
186
|
+
"defaultValue": "smote",
|
|
187
|
+
"name": "SamplingStrategy",
|
|
188
|
+
"alternateNames": [],
|
|
189
|
+
"isRequired": false,
|
|
190
|
+
"rDescription": "specifies the oversampling algorithm to be used to create synthetic samples.",
|
|
191
|
+
"description": "specifies the oversampling algorithm to be used to create synthetic samples.",
|
|
192
|
+
"datatype": "STRING",
|
|
193
|
+
"allowsLists": false,
|
|
194
|
+
"rName": "sampling.strategy",
|
|
195
|
+
"useInR": true,
|
|
196
|
+
"rOrderNum": 10
|
|
197
|
+
},
|
|
198
|
+
{
|
|
199
|
+
"defaultValue": true,
|
|
200
|
+
"isOutputColumn": false,
|
|
201
|
+
"name": "FillSampleID",
|
|
202
|
+
"isRequired": false,
|
|
203
|
+
"rDescription": "A boolean flag which specifies whether the function should write out the id of the observation used to generate the corresponding new synthetic observations.",
|
|
204
|
+
"description": "A boolean flag which specifies whether the function should write out the id of the observation used to generate the corresponding new synthetic observations.",
|
|
205
|
+
"datatype": "BOOLEAN",
|
|
206
|
+
"allowsLists": false,
|
|
207
|
+
"rName": "fill.sampleid",
|
|
208
|
+
"useInR": true,
|
|
209
|
+
"rOrderNum": 11
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
"permittedValues": ["sample", "neighbor", "null"],
|
|
213
|
+
"isOutputColumn": false,
|
|
214
|
+
"defaultValue": "sample",
|
|
215
|
+
"name": "ValueForNonInputColumns",
|
|
216
|
+
"alternateNames": [],
|
|
217
|
+
"isRequired": false,
|
|
218
|
+
"rDescription": "Specifies the value to put in a sample column for columns not specified as input columns.",
|
|
219
|
+
"description": "Specifies the value to put in a sample column for columns not specified as input columns.",
|
|
220
|
+
"datatype": "STRING",
|
|
221
|
+
"allowsLists": false,
|
|
222
|
+
"rName": "noninput.columns.value",
|
|
223
|
+
"useInR": true,
|
|
224
|
+
"rOrderNum": 12
|
|
225
|
+
},
|
|
226
|
+
{
|
|
227
|
+
"defaultValue": 5,
|
|
228
|
+
"lowerBound": 2,
|
|
229
|
+
"upperBound": 100,
|
|
230
|
+
"lowerBoundType": "INCLUSIVE",
|
|
231
|
+
"upperBoundType": "INCLUSIVE",
|
|
232
|
+
"allowNaN": false,
|
|
233
|
+
"isOutputColumn": false,
|
|
234
|
+
"matchLengthOfArgument": "",
|
|
235
|
+
"allowPadding": false,
|
|
236
|
+
"name": "NumberOfNeighbors",
|
|
237
|
+
"alternateNames": [],
|
|
238
|
+
"isRequired": false,
|
|
239
|
+
"rDescription": "Specifies the number of nearest neighbors for choosing the sample to be used in oversampling.",
|
|
240
|
+
"description": "Specifies the number of nearest neighbors for choosing the sample to be used in oversampling.",
|
|
241
|
+
"datatype": "INTEGER",
|
|
242
|
+
"allowsLists": false,
|
|
243
|
+
"rName": "n.neighbors",
|
|
244
|
+
"useInR": true,
|
|
245
|
+
"rOrderNum": 13
|
|
246
|
+
},
|
|
247
|
+
{
|
|
248
|
+
"lowerBound": 0,
|
|
249
|
+
"upperBound": 186006,
|
|
250
|
+
"lowerBoundType": "INCLUSIVE",
|
|
251
|
+
"upperBoundType": "INCLUSIVE",
|
|
252
|
+
"allowNaN": false,
|
|
253
|
+
"name": "Seed",
|
|
254
|
+
"alternateNames": [],
|
|
255
|
+
"isRequired": false,
|
|
256
|
+
"rDescription": "Specify the random seed the algorithm uses for repeatable results. The function uses the seed for random interpolation and generate the synthetic sample.",
|
|
257
|
+
"description": "Specify the random seed the algorithm uses for repeatable results. The function uses the seed for random interpolation and generate the synthetic sample.",
|
|
258
|
+
"datatype": "INTEGER",
|
|
259
|
+
"allowsLists": false,
|
|
260
|
+
"rName": "seed",
|
|
261
|
+
"useInR": true,
|
|
262
|
+
"rOrderNum": 14
|
|
263
|
+
}
|
|
264
|
+
]
|
|
265
|
+
}
|
|
266
|
+
|