teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +193 -1
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +25 -18
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +20 -2
- teradataml/analytics/utils.py +15 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +341 -112
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +84 -42
- teradataml/automl/data_transformation.py +69 -33
- teradataml/automl/feature_engineering.py +76 -9
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +35 -14
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +122 -63
- teradataml/common/messagecodes.py +14 -3
- teradataml/common/messages.py +8 -4
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +366 -74
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +348 -86
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +45 -29
- teradataml/dataframe/data_transfer.py +72 -46
- teradataml/dataframe/dataframe.py +642 -166
- teradataml/dataframe/dataframe_utils.py +167 -22
- teradataml/dataframe/functions.py +135 -20
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +330 -78
- teradataml/dbutils/dbutils.py +556 -140
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -26
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +307 -40
- teradataml/scriptmgmt/lls_utils.py +428 -145
- teradataml/store/__init__.py +2 -3
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +48 -19
- teradataml/table_operators/Script.py +23 -2
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +49 -1
- teradataml/utils/internal_buffer.py +38 -0
- teradataml/utils/validators.py +377 -62
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,267 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "2",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_SMOTE",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "fastpath",
|
|
8
|
+
"function_category": "Feature Engineering Transform",
|
|
9
|
+
"function_alias_name": "TD_SMOTE",
|
|
10
|
+
"function_r_name": "aa.td_smote",
|
|
11
|
+
"ref_function_r_name": "aa.td_smote",
|
|
12
|
+
"short_description": "This function generates data by oversampling a minority class.",
|
|
13
|
+
"long_description": "This function generates data by oversampling a minority class using smote, adasyn, borderline-2 or smote-nc algorithms.",
|
|
14
|
+
"input_tables": [
|
|
15
|
+
{
|
|
16
|
+
"requiredInputKind": [
|
|
17
|
+
"PartitionByAny"
|
|
18
|
+
],
|
|
19
|
+
"isOrdered": false,
|
|
20
|
+
"partitionByOne": false,
|
|
21
|
+
"name": "InputTable",
|
|
22
|
+
"alternateNames": [],
|
|
23
|
+
"isRequired": true,
|
|
24
|
+
"rDescription": "Specifies the table containing the input data.",
|
|
25
|
+
"description": "Specifies the table containing the input data.",
|
|
26
|
+
"datatype": "TABLE_ALIAS",
|
|
27
|
+
"allowsLists": false,
|
|
28
|
+
"rName": "data",
|
|
29
|
+
"useInR": true,
|
|
30
|
+
"rOrderNum": 1
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"requiredInputKind": [
|
|
34
|
+
"Dimension"
|
|
35
|
+
],
|
|
36
|
+
|
|
37
|
+
"isOrdered": false,
|
|
38
|
+
"partitionByOne": false,
|
|
39
|
+
"name": "EncodingsTable",
|
|
40
|
+
"alternateNames": [],
|
|
41
|
+
"isRequired": false,
|
|
42
|
+
"rDescription": "Specifies the table created with TD_OrdinalEncodingFit output, using the categorical input columns as TargetColumns. Used and required only with smotenc.",
|
|
43
|
+
"description": "Specifies the table creted with TD_OrdinalEncodingFit output, using the categorical input columns as TargetColumns. Used and required only with smotenc.",
|
|
44
|
+
"datatype": "TABLE_ALIAS",
|
|
45
|
+
"allowsLists": false,
|
|
46
|
+
"rName": "encoding.data",
|
|
47
|
+
"useInR": true,
|
|
48
|
+
"rOrderNum": 2
|
|
49
|
+
}
|
|
50
|
+
],
|
|
51
|
+
"argument_clauses": [
|
|
52
|
+
{
|
|
53
|
+
"targetTable": [
|
|
54
|
+
"InputTable"
|
|
55
|
+
],
|
|
56
|
+
|
|
57
|
+
"checkDuplicate": true,
|
|
58
|
+
"allowedTypes": [],
|
|
59
|
+
"allowedTypeGroups": [
|
|
60
|
+
"INTEGER"
|
|
61
|
+
],
|
|
62
|
+
|
|
63
|
+
"matchLengthOfArgument": "",
|
|
64
|
+
"allowPadding": false,
|
|
65
|
+
"name": "IDColumn",
|
|
66
|
+
"alternateNames": [],
|
|
67
|
+
"isRequired": true,
|
|
68
|
+
"rDescription": "specifies the name of the column that uniquely identifies a data sample in the input table.",
|
|
69
|
+
"description": "specifies the name of the column that uniquely identifies a data sample in the input table.",
|
|
70
|
+
"datatype": "COLUMNS",
|
|
71
|
+
"allowsLists": false,
|
|
72
|
+
"rName": "id.column",
|
|
73
|
+
"useInR": true,
|
|
74
|
+
"rOrderNum": 3
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"targetTable": [
|
|
78
|
+
"InputTable"
|
|
79
|
+
],
|
|
80
|
+
"checkDuplicate": true,
|
|
81
|
+
"allowedTypes": [],
|
|
82
|
+
"allowedTypeGroups": [
|
|
83
|
+
"NUMERIC"
|
|
84
|
+
],
|
|
85
|
+
|
|
86
|
+
"matchLengthOfArgument": "",
|
|
87
|
+
"allowPadding": false,
|
|
88
|
+
"name": "ResponseColumn",
|
|
89
|
+
"alternateNames": [],
|
|
90
|
+
"isRequired": false,
|
|
91
|
+
"rDescription": "specifies the name of the input table column that contains the numeric value to be used as the response value for a sample.",
|
|
92
|
+
"description": "specifies the name of the input table column that contains the numeric value to be used as the response value for a sample.",
|
|
93
|
+
"datatype": "COLUMNS",
|
|
94
|
+
"allowsLists": false,
|
|
95
|
+
"rName": "response.column",
|
|
96
|
+
"useInR": true,
|
|
97
|
+
"rOrderNum": 4
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"targetTable": [
|
|
101
|
+
"InputTable"
|
|
102
|
+
],
|
|
103
|
+
"checkDuplicate": true,
|
|
104
|
+
"allowedTypes": [],
|
|
105
|
+
"allowedTypeGroups": [
|
|
106
|
+
"NUMERIC"
|
|
107
|
+
],
|
|
108
|
+
"matchLengthOfArgument": "",
|
|
109
|
+
"allowPadding": false,
|
|
110
|
+
"name": "InputColumns",
|
|
111
|
+
"alternateNames": [],
|
|
112
|
+
"isRequired": true,
|
|
113
|
+
"rDescription": "specifies the name of the input table columns that the function uses for oversampling.",
|
|
114
|
+
"description": "specifies the name of the input table columns that the function uses for oversampling.",
|
|
115
|
+
"datatype": "COLUMNS",
|
|
116
|
+
"allowsLists": true,
|
|
117
|
+
"rName": "input.columns",
|
|
118
|
+
"useInR": true,
|
|
119
|
+
"rOrderNum": 5
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"targetTable": [
|
|
123
|
+
"InputTable"
|
|
124
|
+
],
|
|
125
|
+
"checkDuplicate": true,
|
|
126
|
+
"allowedTypes": [],
|
|
127
|
+
"allowedTypeGroups": [
|
|
128
|
+
"STRING"
|
|
129
|
+
],
|
|
130
|
+
"matchLengthOfArgument": "",
|
|
131
|
+
"allowPadding": false,
|
|
132
|
+
"name": "CategoricalInputColumns",
|
|
133
|
+
"alternateNames": [],
|
|
134
|
+
"isRequired": false,
|
|
135
|
+
"rDescription": "specifies the name of the categorical columns in the input table that the function uses for oversampling with smotenc.",
|
|
136
|
+
"description": "specifies the name of the categorical columns in the input table that the function uses for oversampling with smotenc.",
|
|
137
|
+
"datatype": "COLUMNS",
|
|
138
|
+
"allowsLists": true,
|
|
139
|
+
"rName": "categorical.columns",
|
|
140
|
+
"useInR": true,
|
|
141
|
+
"rOrderNum": 6
|
|
142
|
+
},
|
|
143
|
+
{
|
|
144
|
+
"isOutputColumn": false,
|
|
145
|
+
"name": "MedianStandardDeviation",
|
|
146
|
+
"alternateNames": [],
|
|
147
|
+
"isRequired": false,
|
|
148
|
+
"rDescription": "specifies the median of the standard deviations computed over the numerical input columns. Required only with smotenc.",
|
|
149
|
+
"description": "specifies the median of the standard deviations computed over the numerical input columns. Required only with smotenc.",
|
|
150
|
+
"datatype": "DOUBLE",
|
|
151
|
+
"allowsLists": false,
|
|
152
|
+
"rName": "median.standard.deviation",
|
|
153
|
+
"useInR": true,
|
|
154
|
+
"rOrderNum": 7
|
|
155
|
+
},
|
|
156
|
+
{
|
|
157
|
+
"isOutputColumn": false,
|
|
158
|
+
"name": "MinorityClass",
|
|
159
|
+
"alternateNames": [],
|
|
160
|
+
"isRequired": true,
|
|
161
|
+
"rDescription": "specifies the minority class for which synthetic samples need to be generated. The label for minority class under response column must be numeric integer.",
|
|
162
|
+
"description": "specifies the minority class for which synthetic samples need to be generated. The label for minority class under response column must be numeric integer.",
|
|
163
|
+
"datatype": "STRING",
|
|
164
|
+
"allowsLists": false,
|
|
165
|
+
"rName": "minority.class",
|
|
166
|
+
"useInR": true,
|
|
167
|
+
"rOrderNum": 8
|
|
168
|
+
},
|
|
169
|
+
{
|
|
170
|
+
"isOutputColumn": false,
|
|
171
|
+
"defaultValue":5,
|
|
172
|
+
"allowNaN": false,
|
|
173
|
+
"name": "OversamplingFactor",
|
|
174
|
+
"alternateNames": [],
|
|
175
|
+
"isRequired": true,
|
|
176
|
+
"rDescription": "specifies the factor for oversampling the minority class.",
|
|
177
|
+
"description": "specifies the factor for oversampling the minority class.",
|
|
178
|
+
"datatype": "DOUBLE",
|
|
179
|
+
"allowsLists": false,
|
|
180
|
+
"rName": "oversampling.factor",
|
|
181
|
+
"useInR": true,
|
|
182
|
+
"rOrderNum": 9
|
|
183
|
+
},
|
|
184
|
+
{
|
|
185
|
+
"permittedValues": ["smote", "adasyn", "borderline","smotenc"],
|
|
186
|
+
"isOutputColumn": false,
|
|
187
|
+
"defaultValue": "smote",
|
|
188
|
+
"name": "SamplingStrategy",
|
|
189
|
+
"alternateNames": [],
|
|
190
|
+
"isRequired": false,
|
|
191
|
+
"rDescription": "specifies the oversampling algorithm to be used to create synthetic samples.",
|
|
192
|
+
"description": "specifies the oversampling algorithm to be used to create synthetic samples.",
|
|
193
|
+
"datatype": "STRING",
|
|
194
|
+
"allowsLists": false,
|
|
195
|
+
"rName": "sampling.strategy",
|
|
196
|
+
"useInR": true,
|
|
197
|
+
"rOrderNum": 10
|
|
198
|
+
},
|
|
199
|
+
{
|
|
200
|
+
"defaultValue": true,
|
|
201
|
+
"isOutputColumn": false,
|
|
202
|
+
"name": "FillSampleID",
|
|
203
|
+
"isRequired": false,
|
|
204
|
+
"rDescription": "A boolean flag which specifies whether the function should write out the id of the observation used to generate the corresponding new synthetic observations.",
|
|
205
|
+
"description": "A boolean flag which specifies whether the function should write out the id of the observation used to generate the corresponding new synthetic observations.",
|
|
206
|
+
"datatype": "BOOLEAN",
|
|
207
|
+
"allowsLists": false,
|
|
208
|
+
"rName": "fill.sampleid",
|
|
209
|
+
"useInR": true,
|
|
210
|
+
"rOrderNum": 11
|
|
211
|
+
},
|
|
212
|
+
{
|
|
213
|
+
"permittedValues": ["sample", "neighbor", "null"],
|
|
214
|
+
"isOutputColumn": false,
|
|
215
|
+
"defaultValue": "sample",
|
|
216
|
+
"name": "ValueForNonInputColumns",
|
|
217
|
+
"alternateNames": [],
|
|
218
|
+
"isRequired": false,
|
|
219
|
+
"rDescription": "Specifies the value to put in a sample column for columns not specified as input columns.",
|
|
220
|
+
"description": "Specifies the value to put in a sample column for columns not specified as input columns.",
|
|
221
|
+
"datatype": "STRING",
|
|
222
|
+
"allowsLists": false,
|
|
223
|
+
"rName": "noninput.columns.value",
|
|
224
|
+
"useInR": true,
|
|
225
|
+
"rOrderNum": 12
|
|
226
|
+
},
|
|
227
|
+
{
|
|
228
|
+
"defaultValue": 5,
|
|
229
|
+
"lowerBound": 2,
|
|
230
|
+
"upperBound": 100,
|
|
231
|
+
"lowerBoundType": "INCLUSIVE",
|
|
232
|
+
"upperBoundType": "INCLUSIVE",
|
|
233
|
+
"allowNaN": false,
|
|
234
|
+
"isOutputColumn": false,
|
|
235
|
+
"matchLengthOfArgument": "",
|
|
236
|
+
"allowPadding": false,
|
|
237
|
+
"name": "NumberOfNeighbors",
|
|
238
|
+
"alternateNames": [],
|
|
239
|
+
"isRequired": false,
|
|
240
|
+
"rDescription": "Specifies the number of nearest neighbors for choosing the sample to be used in oversampling.",
|
|
241
|
+
"description": "Specifies the number of nearest neighbors for choosing the sample to be used in oversampling.",
|
|
242
|
+
"datatype": "INTEGER",
|
|
243
|
+
"allowsLists": false,
|
|
244
|
+
"rName": "n.neighbors",
|
|
245
|
+
"useInR": true,
|
|
246
|
+
"rOrderNum": 13
|
|
247
|
+
},
|
|
248
|
+
{
|
|
249
|
+
"lowerBound": 0,
|
|
250
|
+
"upperBound": 186006,
|
|
251
|
+
"lowerBoundType": "INCLUSIVE",
|
|
252
|
+
"upperBoundType": "INCLUSIVE",
|
|
253
|
+
"allowNaN": false,
|
|
254
|
+
"name": "Seed",
|
|
255
|
+
"alternateNames": [],
|
|
256
|
+
"isRequired": false,
|
|
257
|
+
"rDescription": "Specify the random seed the algorithm uses for repeatable results. The function uses the seed for random interpolation and generate the synthetic sample.",
|
|
258
|
+
"description": "Specify the random seed the algorithm uses for repeatable results. The function uses the seed for random interpolation and generate the synthetic sample.",
|
|
259
|
+
"datatype": "INTEGER",
|
|
260
|
+
"allowsLists": false,
|
|
261
|
+
"rName": "seed",
|
|
262
|
+
"useInR": true,
|
|
263
|
+
"rOrderNum": 14
|
|
264
|
+
}
|
|
265
|
+
]
|
|
266
|
+
}
|
|
267
|
+
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "1",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_TextMorph",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "fastpath",
|
|
8
|
+
"function_category": "Text Analysis",
|
|
9
|
+
"function_alias_name": "TD_TextMorph",
|
|
10
|
+
"function_r_name": "td.textmorph",
|
|
11
|
+
"short_description": "This function is used to generate morph of given words in the input dataset.",
|
|
12
|
+
"long_description": "This function is used to generate morph of given words in the input dataset.",
|
|
13
|
+
"input_tables": [
|
|
14
|
+
{
|
|
15
|
+
"requiredInputKind": [
|
|
16
|
+
"PartitionByAny"
|
|
17
|
+
],
|
|
18
|
+
"isOrdered": false,
|
|
19
|
+
"partitionByOne": false,
|
|
20
|
+
"name": "InputTable",
|
|
21
|
+
"alternateNames": [],
|
|
22
|
+
"isRequired": true,
|
|
23
|
+
"rDescription": "The table that contains the input dataset.",
|
|
24
|
+
"description": "The table that contains the input dataset.",
|
|
25
|
+
"datatype": "TABLE_ALIAS",
|
|
26
|
+
"allowsLists": false,
|
|
27
|
+
"rName": "data",
|
|
28
|
+
"useInR": true,
|
|
29
|
+
"rOrderNum": 1
|
|
30
|
+
}
|
|
31
|
+
],
|
|
32
|
+
"argument_clauses": [
|
|
33
|
+
{
|
|
34
|
+
"targetTable": [
|
|
35
|
+
"InputTable"
|
|
36
|
+
],
|
|
37
|
+
"checkDuplicate": true,
|
|
38
|
+
"allowedTypes": [],
|
|
39
|
+
"allowedTypeGroups": [
|
|
40
|
+
"STRING"
|
|
41
|
+
],
|
|
42
|
+
"matchLengthOfArgument": "",
|
|
43
|
+
"allowPadding": false,
|
|
44
|
+
"name": "WordColumn",
|
|
45
|
+
"alternateNames": [],
|
|
46
|
+
"isRequired": true,
|
|
47
|
+
"rDescription": "Specify the name of the input column that contains words, for which morphs to be generated.",
|
|
48
|
+
"description": "Specify the name of the input column that contains words, for which morphs to be generated.",
|
|
49
|
+
"datatype": "COLUMNS",
|
|
50
|
+
"allowsLists": false,
|
|
51
|
+
"rName": "word.column",
|
|
52
|
+
"useInR": true,
|
|
53
|
+
"rOrderNum": 2
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"permittedValues": [
|
|
57
|
+
"NOUN",
|
|
58
|
+
"VERB",
|
|
59
|
+
"ADV",
|
|
60
|
+
"ADJ"
|
|
61
|
+
],
|
|
62
|
+
"isOutputColumn": false,
|
|
63
|
+
"matchLengthOfArgument": "",
|
|
64
|
+
"allowPadding": false,
|
|
65
|
+
"name": "POS",
|
|
66
|
+
"alternateNames": [],
|
|
67
|
+
"isRequired": false,
|
|
68
|
+
"rDescription": "Specifies the part of speech to output.",
|
|
69
|
+
"description": "Specifies the part of speech to output.",
|
|
70
|
+
"datatype": "STRING",
|
|
71
|
+
"allowsLists": true,
|
|
72
|
+
"rName": "pos",
|
|
73
|
+
"useInR": true,
|
|
74
|
+
"rOrderNum": 3
|
|
75
|
+
},
|
|
76
|
+
{
|
|
77
|
+
"defaultValue": false,
|
|
78
|
+
"name": "SingleOutput",
|
|
79
|
+
"alternateNames": [],
|
|
80
|
+
"isRequired": false,
|
|
81
|
+
"rDescription": " Specify whether to output only one morph for each word. If user specifies 'false', the function outputs all morphs for each word.",
|
|
82
|
+
"description": " Specify whether to output only one morph for each word. If user specifies 'false', the function outputs all morphs for each word.",
|
|
83
|
+
"datatype": "BOOLEAN",
|
|
84
|
+
"allowsLists": false,
|
|
85
|
+
"rName": "single.output",
|
|
86
|
+
"useInR": true,
|
|
87
|
+
"rOrderNum": 4
|
|
88
|
+
},
|
|
89
|
+
{
|
|
90
|
+
"targetTable": [
|
|
91
|
+
"InputTable"
|
|
92
|
+
],
|
|
93
|
+
"checkDuplicate": true,
|
|
94
|
+
"allowedTypes": [],
|
|
95
|
+
"allowedTypeGroups": [
|
|
96
|
+
"STRING"
|
|
97
|
+
],
|
|
98
|
+
"matchLengthOfArgument": "",
|
|
99
|
+
"allowPadding": false,
|
|
100
|
+
"name": "POSTagColumn",
|
|
101
|
+
"alternateNames": [],
|
|
102
|
+
"isRequired": false,
|
|
103
|
+
"rDescription": "Specify the name of the input table column that contains the part-of-speech (POS) tags of the words, output by the function TD_POSTagger.",
|
|
104
|
+
"description": "Specify the name of the input table column that contains the part-of-speech (POS) tags of the words, output by the function TD_POSTagger.",
|
|
105
|
+
"datatype": "COLUMNS",
|
|
106
|
+
"allowsLists": false,
|
|
107
|
+
"rName": "postag.column",
|
|
108
|
+
"useInR": true,
|
|
109
|
+
"rOrderNum": 5
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"targetTable": [
|
|
113
|
+
"InputTable"
|
|
114
|
+
],
|
|
115
|
+
"checkDuplicate": true,
|
|
116
|
+
"allowedTypes": [],
|
|
117
|
+
"allowedTypeGroups": [
|
|
118
|
+
"ALL"
|
|
119
|
+
],
|
|
120
|
+
"matchLengthOfArgument": "",
|
|
121
|
+
"allowPadding": false,
|
|
122
|
+
"name": "Accumulate",
|
|
123
|
+
"alternateNames": [],
|
|
124
|
+
"isRequired": false,
|
|
125
|
+
"rDescription": "Specifies the names of the input columns to copy to the output table.",
|
|
126
|
+
"description": "Specifies the names of the input columns to copy to the output table.",
|
|
127
|
+
"datatype": "COLUMNS",
|
|
128
|
+
"allowsLists": true,
|
|
129
|
+
"rName": "accumulate",
|
|
130
|
+
"useInR": true,
|
|
131
|
+
"rOrderNum": 6
|
|
132
|
+
}
|
|
133
|
+
]
|
|
134
|
+
}
|
|
@@ -67,6 +67,23 @@
|
|
|
67
67
|
"useInR": true,
|
|
68
68
|
"rOrderNum": 3
|
|
69
69
|
},
|
|
70
|
+
{
|
|
71
|
+
"defaultValue": false,
|
|
72
|
+
"permittedValues": [],
|
|
73
|
+
"isOutputColumn": false,
|
|
74
|
+
"matchLengthOfArgument": "",
|
|
75
|
+
"allowPadding": false,
|
|
76
|
+
"name": "EnforceTokenLimit",
|
|
77
|
+
"alternateNames": [],
|
|
78
|
+
"isRequired": false,
|
|
79
|
+
"rDescription": "Specifies whether to throw an informative error when finding token larger than 64K/32K or silently discard those tokens larger.",
|
|
80
|
+
"description": "Specifies whether to throw an informative error when finding token larger than 64K/32K or silently discard those tokens larger.",
|
|
81
|
+
"datatype": "BOOLEAN",
|
|
82
|
+
"allowsLists": false,
|
|
83
|
+
"rName": "enforce.token.limit",
|
|
84
|
+
"useInR": true,
|
|
85
|
+
"rOrderNum": 4
|
|
86
|
+
},
|
|
70
87
|
{
|
|
71
88
|
"defaultValue": true,
|
|
72
89
|
"permittedValues": [],
|
|
@@ -82,7 +99,7 @@
|
|
|
82
99
|
"allowsLists": false,
|
|
83
100
|
"rName": "convert.to.lowercase",
|
|
84
101
|
"useInR": true,
|
|
85
|
-
"rOrderNum":
|
|
102
|
+
"rOrderNum": 5
|
|
86
103
|
},
|
|
87
104
|
{
|
|
88
105
|
"defaultValue": false,
|
|
@@ -99,7 +116,7 @@
|
|
|
99
116
|
"allowsLists": false,
|
|
100
117
|
"rName": "stem.tokens",
|
|
101
118
|
"useInR": true,
|
|
102
|
-
"rOrderNum":
|
|
119
|
+
"rOrderNum": 6
|
|
103
120
|
},
|
|
104
121
|
{
|
|
105
122
|
"defaultValue": false,
|
|
@@ -116,7 +133,7 @@
|
|
|
116
133
|
"allowsLists": false,
|
|
117
134
|
"rName": "remove.stopwords",
|
|
118
135
|
"useInR": true,
|
|
119
|
-
"rOrderNum":
|
|
136
|
+
"rOrderNum": 7
|
|
120
137
|
},
|
|
121
138
|
{
|
|
122
139
|
"targetTable": [
|
|
@@ -138,7 +155,7 @@
|
|
|
138
155
|
"allowsLists": true,
|
|
139
156
|
"rName": "accumulate",
|
|
140
157
|
"useInR": true,
|
|
141
|
-
"rOrderNum":
|
|
158
|
+
"rOrderNum": 8
|
|
142
159
|
},
|
|
143
160
|
{
|
|
144
161
|
"defaultValue": " \\t\\n\\f\\r",
|
|
@@ -154,8 +171,23 @@
|
|
|
154
171
|
"allowsLists": false,
|
|
155
172
|
"rName": "delimiter",
|
|
156
173
|
"useInR": true,
|
|
157
|
-
"rOrderNum":
|
|
174
|
+
"rOrderNum": 9
|
|
158
175
|
},
|
|
176
|
+
{
|
|
177
|
+
"isOutputColumn": false,
|
|
178
|
+
"matchLengthOfArgument": "",
|
|
179
|
+
"allowPadding": false,
|
|
180
|
+
"name": "DelimiterRegex",
|
|
181
|
+
"alternateNames": [],
|
|
182
|
+
"isRequired": false,
|
|
183
|
+
"rDescription": "Specifies a PCRE regular expression that represents the word delimiter.",
|
|
184
|
+
"description": "Specifies a PCRE regular expression that represents the word delimiter.",
|
|
185
|
+
"datatype": "STRING",
|
|
186
|
+
"allowsLists": false,
|
|
187
|
+
"rName": "delimiter.regex",
|
|
188
|
+
"useInR": true,
|
|
189
|
+
"rOrderNum": 10
|
|
190
|
+
},
|
|
159
191
|
{
|
|
160
192
|
"defaultValue": "!#$%&()*+,-./:;?@\\^_`{|}~",
|
|
161
193
|
"isOutputColumn": false,
|
|
@@ -170,7 +202,7 @@
|
|
|
170
202
|
"allowsLists": false,
|
|
171
203
|
"rName": "punctuation",
|
|
172
204
|
"useInR": true,
|
|
173
|
-
"rOrderNum":
|
|
205
|
+
"rOrderNum": 11
|
|
174
206
|
},
|
|
175
207
|
{
|
|
176
208
|
"defaultValue": "token",
|
|
@@ -186,7 +218,80 @@
|
|
|
186
218
|
"allowsLists": false,
|
|
187
219
|
"rName": "token.col.name",
|
|
188
220
|
"useInR": true,
|
|
189
|
-
"rOrderNum":
|
|
190
|
-
}
|
|
221
|
+
"rOrderNum": 12
|
|
222
|
+
},
|
|
223
|
+
{
|
|
224
|
+
"targetTable": [
|
|
225
|
+
"InputTable"
|
|
226
|
+
],
|
|
227
|
+
"checkDuplicate": true,
|
|
228
|
+
"allowedTypes": [],
|
|
229
|
+
"allowedTypeGroups": [
|
|
230
|
+
"INTEGER"
|
|
231
|
+
],
|
|
232
|
+
"matchLengthOfArgument": "",
|
|
233
|
+
"allowPadding": false,
|
|
234
|
+
"name": "DocIdColumn",
|
|
235
|
+
"alternateNames": [],
|
|
236
|
+
"isRequired": false,
|
|
237
|
+
"rDescription": "specifies the name of the column that uniquely identifies a row in the input table.",
|
|
238
|
+
"description": "specifies the name of the column that uniquely identifies a row in the input table.",
|
|
239
|
+
"datatype": "COLUMNS",
|
|
240
|
+
"allowsLists": false,
|
|
241
|
+
"rName": "doc.id.column",
|
|
242
|
+
"useInR": true,
|
|
243
|
+
"rOrderNum": 13
|
|
244
|
+
},
|
|
245
|
+
{
|
|
246
|
+
"defaultValue": false,
|
|
247
|
+
"permittedValues": [],
|
|
248
|
+
"isOutputColumn": false,
|
|
249
|
+
"matchLengthOfArgument": "",
|
|
250
|
+
"allowPadding": false,
|
|
251
|
+
"name": "ListPositions",
|
|
252
|
+
"alternateNames": [],
|
|
253
|
+
"isRequired": false,
|
|
254
|
+
"rDescription": "Specifies whether to output the positions of a word in list form.",
|
|
255
|
+
"description": "Specifies whether to output the positions of a word in list form.",
|
|
256
|
+
"datatype": "BOOLEAN",
|
|
257
|
+
"allowsLists": false,
|
|
258
|
+
"rName": "list.positions",
|
|
259
|
+
"useInR": true,
|
|
260
|
+
"rOrderNum": 14
|
|
261
|
+
},
|
|
262
|
+
{
|
|
263
|
+
"defaultValue": false,
|
|
264
|
+
"permittedValues": [],
|
|
265
|
+
"isOutputColumn": false,
|
|
266
|
+
"matchLengthOfArgument": "",
|
|
267
|
+
"allowPadding": false,
|
|
268
|
+
"name": "TokenFrequency",
|
|
269
|
+
"alternateNames": [],
|
|
270
|
+
"isRequired": false,
|
|
271
|
+
"rDescription": "Specifies whether to output the frequency for each token.",
|
|
272
|
+
"description": "Specifies whether to output the frequency for each token.",
|
|
273
|
+
"datatype": "BOOLEAN",
|
|
274
|
+
"allowsLists": false,
|
|
275
|
+
"rName": "token.frequency",
|
|
276
|
+
"useInR": true,
|
|
277
|
+
"rOrderNum": 15
|
|
278
|
+
},
|
|
279
|
+
{
|
|
280
|
+
"defaultValue": true,
|
|
281
|
+
"permittedValues": [],
|
|
282
|
+
"isOutputColumn": false,
|
|
283
|
+
"matchLengthOfArgument": "",
|
|
284
|
+
"allowPadding": false,
|
|
285
|
+
"name": "OutputByWord",
|
|
286
|
+
"alternateNames": [],
|
|
287
|
+
"isRequired": false,
|
|
288
|
+
"rDescription": "Specifies whether to output each token in a separate row or all tokens in one.",
|
|
289
|
+
"description": "Specifies whether to output each token in a separate row or all tokens in one.",
|
|
290
|
+
"datatype": "BOOLEAN",
|
|
291
|
+
"allowsLists": false,
|
|
292
|
+
"rName": "output.by.word",
|
|
293
|
+
"useInR": true,
|
|
294
|
+
"rOrderNum": 16
|
|
295
|
+
}
|
|
191
296
|
]
|
|
192
|
-
}
|
|
297
|
+
}
|