teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +193 -1
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +25 -18
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +20 -2
- teradataml/analytics/utils.py +15 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +341 -112
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +84 -42
- teradataml/automl/data_transformation.py +69 -33
- teradataml/automl/feature_engineering.py +76 -9
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +35 -14
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +122 -63
- teradataml/common/messagecodes.py +14 -3
- teradataml/common/messages.py +8 -4
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +366 -74
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +348 -86
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +45 -29
- teradataml/dataframe/data_transfer.py +72 -46
- teradataml/dataframe/dataframe.py +642 -166
- teradataml/dataframe/dataframe_utils.py +167 -22
- teradataml/dataframe/functions.py +135 -20
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +330 -78
- teradataml/dbutils/dbutils.py +556 -140
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -26
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +307 -40
- teradataml/scriptmgmt/lls_utils.py +428 -145
- teradataml/store/__init__.py +2 -3
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +48 -19
- teradataml/table_operators/Script.py +23 -2
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +49 -1
- teradataml/utils/internal_buffer.py +38 -0
- teradataml/utils/validators.py +377 -62
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,266 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "2",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "ONNXEmbeddings",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "byom",
|
|
8
|
+
"function_r_name": "aa.onnx.embeddings",
|
|
9
|
+
"function_alias_name": "ONNXEmbeddings",
|
|
10
|
+
"short_description": "This Function generates embeddings values using an ONNX model in Vantage",
|
|
11
|
+
"long_description": "This function is used to calculate embeddings values in Vantage with a HuggingFace model that has been created outside Vantage and exported to vantage using ONNX format",
|
|
12
|
+
"input_tables": [
|
|
13
|
+
{
|
|
14
|
+
"requiredInputKind": [
|
|
15
|
+
"PartitionByAny",
|
|
16
|
+
"PartitionByKey"
|
|
17
|
+
],
|
|
18
|
+
"isOrdered": false,
|
|
19
|
+
"partitionByOne": false,
|
|
20
|
+
"partitionByOneInclusive": false,
|
|
21
|
+
"name": "InputTable",
|
|
22
|
+
"alternateNames": [],
|
|
23
|
+
"isRequired": true,
|
|
24
|
+
"rDescription": "The input table that contains the text from which we generate embedding values ",
|
|
25
|
+
"description": "The input table that contains the text from which we generate embedding values ",
|
|
26
|
+
"datatype": "TABLE_ALIAS",
|
|
27
|
+
"allowsLists": false,
|
|
28
|
+
"rName": "newdata",
|
|
29
|
+
"useInR": true,
|
|
30
|
+
"rOrderNum": 1
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"requiredInputKind": [
|
|
34
|
+
"Dimension"
|
|
35
|
+
],
|
|
36
|
+
"isOrdered": false,
|
|
37
|
+
"partitionByOne": false,
|
|
38
|
+
"partitionByOneInclusive": false,
|
|
39
|
+
"name": "ModelTable",
|
|
40
|
+
"alternateNames": [],
|
|
41
|
+
"isRequired": true,
|
|
42
|
+
"rDescription": "The model table to be used for calculating embedding values ",
|
|
43
|
+
"description": "The model table to be used for calculating embedding values ",
|
|
44
|
+
"datatype": "TABLE_ALIAS",
|
|
45
|
+
"allowsLists": false,
|
|
46
|
+
"rName": "modeldata",
|
|
47
|
+
"useInR": true,
|
|
48
|
+
"rOrderNum": 2
|
|
49
|
+
},
|
|
50
|
+
{
|
|
51
|
+
"requiredInputKind": [
|
|
52
|
+
"Dimension"
|
|
53
|
+
],
|
|
54
|
+
"isOrdered": false,
|
|
55
|
+
"partitionByOne": false,
|
|
56
|
+
"partitionByOneInclusive": false,
|
|
57
|
+
"name": "TokenizerTable",
|
|
58
|
+
"alternateNames": [],
|
|
59
|
+
"isRequired": true,
|
|
60
|
+
"rDescription": "The tokenizer table which contains the tokenizer json file ",
|
|
61
|
+
"description": "The tokenizer table which contains the tokenizer json file ",
|
|
62
|
+
"datatype": "TABLE_ALIAS",
|
|
63
|
+
"allowsLists": false,
|
|
64
|
+
"rName": "tokenizerdata",
|
|
65
|
+
"useInR": true,
|
|
66
|
+
"rOrderNum": 3
|
|
67
|
+
}
|
|
68
|
+
],
|
|
69
|
+
"argument_clauses": [
|
|
70
|
+
{
|
|
71
|
+
"targetTable": [
|
|
72
|
+
"InputTable"
|
|
73
|
+
],
|
|
74
|
+
"checkDuplicate": true,
|
|
75
|
+
"allowedTypes": [],
|
|
76
|
+
"allowedTypeGroups": [
|
|
77
|
+
"ALL"
|
|
78
|
+
],
|
|
79
|
+
"matchLengthOfArgument": "",
|
|
80
|
+
"allowPadding": true,
|
|
81
|
+
"name": "Accumulate",
|
|
82
|
+
"alternateNames": [],
|
|
83
|
+
"isRequired": true,
|
|
84
|
+
"rDescription": "Specifies the names of input_table columns to copy to the output table.",
|
|
85
|
+
"description": "Specify the names of the input columns to copy to the output table. ",
|
|
86
|
+
"datatype": "COLUMNS",
|
|
87
|
+
"allowsLists": true,
|
|
88
|
+
"rName": "accumulate",
|
|
89
|
+
"useInR": true,
|
|
90
|
+
"rOrderNum": 4
|
|
91
|
+
},
|
|
92
|
+
{
|
|
93
|
+
"checkDuplicate": true,
|
|
94
|
+
"allowedTypes": [],
|
|
95
|
+
"allowedTypeGroups": [
|
|
96
|
+
"ALL"
|
|
97
|
+
],
|
|
98
|
+
"matchLengthOfArgument": "",
|
|
99
|
+
"allowPadding": true,
|
|
100
|
+
"name": "ModelOutputTensor",
|
|
101
|
+
"alternateNames": [],
|
|
102
|
+
"isRequired": true,
|
|
103
|
+
"rDescription": "Specifies the column of the model's possible output fields that the user wants to calculate and output ",
|
|
104
|
+
"description": "Specifies the column of the model's possible output fields that the user wants to calculate and output ",
|
|
105
|
+
"datatype": "STRING",
|
|
106
|
+
"allowsLists": false,
|
|
107
|
+
"rName": "model.output.tensor",
|
|
108
|
+
"useInR": true,
|
|
109
|
+
"rOrderNum": 5
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"defaultValue": 512,
|
|
113
|
+
"checkDuplicate": true,
|
|
114
|
+
"allowedTypes": [],
|
|
115
|
+
"allowedTypeGroups": [
|
|
116
|
+
"ALL"
|
|
117
|
+
],
|
|
118
|
+
"matchLengthOfArgument": "",
|
|
119
|
+
"allowPadding": true,
|
|
120
|
+
"name": "EncodeMaxLength",
|
|
121
|
+
"alternateNames": [],
|
|
122
|
+
"isRequired": false,
|
|
123
|
+
"rDescription": "Specifies the maximum length of the tokenizer output token encodings(only applies for models with symbolic dimensions) ",
|
|
124
|
+
"description": "Specifies the maximum length of the tokenizer output token encodings(only applies for models with symbolic dimensions) ",
|
|
125
|
+
"datatype": "INTEGER",
|
|
126
|
+
"allowsLists": false,
|
|
127
|
+
"rName": "encode.max.length",
|
|
128
|
+
"useInR": true,
|
|
129
|
+
"rOrderNum": 6
|
|
130
|
+
},
|
|
131
|
+
{
|
|
132
|
+
"defaultValue": false,
|
|
133
|
+
"checkDuplicate": true,
|
|
134
|
+
"allowedTypes": [],
|
|
135
|
+
"allowedTypeGroups": [
|
|
136
|
+
"ALL"
|
|
137
|
+
],
|
|
138
|
+
"matchLengthOfArgument": "",
|
|
139
|
+
"allowPadding": true,
|
|
140
|
+
"name": "ShowModelProperties",
|
|
141
|
+
"alternateNames": [],
|
|
142
|
+
"isRequired": false,
|
|
143
|
+
"rDescription": " Show default or expanded ModelInputFieldsMap based on input model for defaults or ModelInputFieldsMap for expansion. ",
|
|
144
|
+
"description": " Show default or expanded ModelInputFieldsMap based on input model for defaults or ModelInputFieldsMap for expansion. ",
|
|
145
|
+
"datatype": "BOOLEAN",
|
|
146
|
+
"allowsLists": false,
|
|
147
|
+
"rName": "show.model.properties",
|
|
148
|
+
"useInR": true,
|
|
149
|
+
"rOrderNum": 7
|
|
150
|
+
},
|
|
151
|
+
{
|
|
152
|
+
"defaultValue": "emb_",
|
|
153
|
+
"checkDuplicate": true,
|
|
154
|
+
"allowedTypes": [],
|
|
155
|
+
"allowedTypeGroups": [
|
|
156
|
+
"ALL"
|
|
157
|
+
],
|
|
158
|
+
"matchLengthOfArgument": "",
|
|
159
|
+
"allowPadding": true,
|
|
160
|
+
"name": "OutputColumnPrefix",
|
|
161
|
+
"alternateNames": [],
|
|
162
|
+
"isRequired": false,
|
|
163
|
+
"rDescription": "Specifies the column prefix for each of the output columns when using FLOAT32 OutputFormat ",
|
|
164
|
+
"description": "Specifies the column prefix for each of the output columns when using FLOAT32 OutputFormat ",
|
|
165
|
+
"datatype": "STRING",
|
|
166
|
+
"allowsLists": false,
|
|
167
|
+
"rName": "output.column.prefix",
|
|
168
|
+
"useInR": true,
|
|
169
|
+
"rOrderNum": 8
|
|
170
|
+
},
|
|
171
|
+
{
|
|
172
|
+
"defaultValue": "VARBYTE(3072)",
|
|
173
|
+
"checkDuplicate": true,
|
|
174
|
+
"allowedTypes": [],
|
|
175
|
+
"allowedTypeGroups": [
|
|
176
|
+
"ALL"
|
|
177
|
+
],
|
|
178
|
+
"matchLengthOfArgument": "",
|
|
179
|
+
"allowPadding": true,
|
|
180
|
+
"name": "OutputFormat",
|
|
181
|
+
"alternateNames": [],
|
|
182
|
+
"isRequired": false,
|
|
183
|
+
"rDescription": "Specifies the output format for the model embeddings output ",
|
|
184
|
+
"description": "Specifies the output format for the model embeddings output ",
|
|
185
|
+
"datatype": "STRING",
|
|
186
|
+
"allowsLists": false,
|
|
187
|
+
"rName": "output.format",
|
|
188
|
+
"useInR": true,
|
|
189
|
+
"rOrderNum": 9
|
|
190
|
+
},
|
|
191
|
+
{
|
|
192
|
+
"permittedValues": [
|
|
193
|
+
"true",
|
|
194
|
+
"t",
|
|
195
|
+
"yes",
|
|
196
|
+
"y",
|
|
197
|
+
"1",
|
|
198
|
+
"false",
|
|
199
|
+
"f",
|
|
200
|
+
"no",
|
|
201
|
+
"n",
|
|
202
|
+
"0",
|
|
203
|
+
"*",
|
|
204
|
+
"current_cached_model"
|
|
205
|
+
],
|
|
206
|
+
"defaultValue": "false",
|
|
207
|
+
"checkDuplicate": true,
|
|
208
|
+
"allowedTypes": [],
|
|
209
|
+
"allowedTypeGroups": [
|
|
210
|
+
"ALL"
|
|
211
|
+
],
|
|
212
|
+
"matchLengthOfArgument": "",
|
|
213
|
+
"allowPadding": true,
|
|
214
|
+
"name": "OverwriteCachedModel",
|
|
215
|
+
"alternateNames": [],
|
|
216
|
+
"isRequired": false,
|
|
217
|
+
"rDescription": "Specifies the model name that needs to be removed from the cache. * can also be used to remove the models ",
|
|
218
|
+
"description": " Specifies the model name that needs to be removed from the cache. * can also be used to remove the models ",
|
|
219
|
+
"datatype": "STRING",
|
|
220
|
+
"allowsLists": false,
|
|
221
|
+
"rName": "overwrite.cached.models",
|
|
222
|
+
"useInR": true,
|
|
223
|
+
"rOrderNum": 10
|
|
224
|
+
},
|
|
225
|
+
{
|
|
226
|
+
"defaultValue": false,
|
|
227
|
+
"checkDuplicate": true,
|
|
228
|
+
"allowedTypes": [],
|
|
229
|
+
"allowedTypeGroups": [
|
|
230
|
+
"ALL"
|
|
231
|
+
],
|
|
232
|
+
"matchLengthOfArgument": "",
|
|
233
|
+
"allowPadding": true,
|
|
234
|
+
"name": "IsDebug",
|
|
235
|
+
"alternateNames": [],
|
|
236
|
+
"isRequired": false,
|
|
237
|
+
"rDescription": "Print additional information in trace table regarding execution of ONNXPredict ",
|
|
238
|
+
"description": " Print additional information in trace table regarding execution of ONNXPredict ",
|
|
239
|
+
"datatype": "BOOLEAN",
|
|
240
|
+
"allowsLists": false,
|
|
241
|
+
"rName": "is.debug",
|
|
242
|
+
"useInR": true,
|
|
243
|
+
"rOrderNum": 11
|
|
244
|
+
},
|
|
245
|
+
{
|
|
246
|
+
"defaultValue": true,
|
|
247
|
+
"checkDuplicate": true,
|
|
248
|
+
"allowedTypes": [],
|
|
249
|
+
"allowedTypeGroups": [
|
|
250
|
+
"ALL"
|
|
251
|
+
],
|
|
252
|
+
"matchLengthOfArgument": "",
|
|
253
|
+
"allowPadding": true,
|
|
254
|
+
"name": "EnableMemoryCheck",
|
|
255
|
+
"alternateNames": [],
|
|
256
|
+
"isRequired": false,
|
|
257
|
+
"rDescription": "If true, verifies if there is enough native memory for large models ",
|
|
258
|
+
"description": "If true, verifies if there is enough native memory for large models ",
|
|
259
|
+
"datatype": "BOOLEAN",
|
|
260
|
+
"allowsLists": false,
|
|
261
|
+
"rName": "enable.memory.check",
|
|
262
|
+
"useInR": true,
|
|
263
|
+
"rOrderNum": 12
|
|
264
|
+
}
|
|
265
|
+
]
|
|
266
|
+
}
|
|
@@ -124,8 +124,8 @@
|
|
|
124
124
|
"name": "Delimiter",
|
|
125
125
|
"alternateNames": [],
|
|
126
126
|
"isRequired": false,
|
|
127
|
-
"rDescription": "A character or string that separates words in the input text. The default value is the set of all whitespace characters which includes the characters for space, tab, newline, carriage return and some others.",
|
|
128
|
-
"description": "A character or string that separates words in the input text. The default value is the set of all whitespace characters which includes the characters for space, tab, newline, carriage return and some others.",
|
|
127
|
+
"rDescription": "A character or string or a regular expression that separates words in the input text. The default value is the set of all whitespace characters which includes the characters for space, tab, newline, carriage return and some others.",
|
|
128
|
+
"description": "A character or string or a regular expression that separates words in the input text. The default value is the set of all whitespace characters which includes the characters for space, tab, newline, carriage return and some others.",
|
|
129
129
|
"datatype": "STRING",
|
|
130
130
|
"allowsLists": false,
|
|
131
131
|
"rName": "delimiter",
|
|
@@ -139,8 +139,8 @@
|
|
|
139
139
|
"name": "Punctuation",
|
|
140
140
|
"alternateNames": [],
|
|
141
141
|
"isRequired": false,
|
|
142
|
-
"rDescription": "A string that specifies the punctuation characters for the function to remove before evaluating the input text. The default characters to remove are: `~#^&*()-",
|
|
143
|
-
"description": "A string that specifies the punctuation characters for the function to remove before evaluating the input text. The default characters to remove are: `~#^&*()-",
|
|
142
|
+
"rDescription": "A string or a regular expression that specifies the punctuation characters for the function to remove before evaluating the input text. The default characters to remove are: `~#^&*()-",
|
|
143
|
+
"description": "A string or a regular expression that specifies the punctuation characters for the function to remove before evaluating the input text. The default characters to remove are: `~#^&*()-",
|
|
144
144
|
"datatype": "STRING",
|
|
145
145
|
"allowsLists": false,
|
|
146
146
|
"rName": "punctuation",
|
|
@@ -154,8 +154,8 @@
|
|
|
154
154
|
"name": "Reset",
|
|
155
155
|
"alternateNames": [],
|
|
156
156
|
"isRequired": false,
|
|
157
|
-
"rDescription": "A string that specifies the character or string that ends a sentence. The default sentence-ending characters are: .,?! At the end of a sentence, the function discards any partial n-grams and searches for the next n-gram at the beginning of the next sentence. An n-gram cannot span two sentences.",
|
|
158
|
-
"description": "A string expression that specifies the character or string that ends a sentence. The default sentence-ending characters are: .,?! At the end of a sentence, the function discards any partial n-grams and searches for the next n-gram at the beginning of the next sentence. An n-gram cannot span two sentences.",
|
|
157
|
+
"rDescription": "A string or a regular expression that specifies the character or string that ends a sentence. The default sentence-ending characters are: .,?! At the end of a sentence, the function discards any partial n-grams and searches for the next n-gram at the beginning of the next sentence. An n-gram cannot span two sentences.",
|
|
158
|
+
"description": "A string or a regular expression that specifies the character or string that ends a sentence. The default sentence-ending characters are: .,?! At the end of a sentence, the function discards any partial n-grams and searches for the next n-gram at the beginning of the next sentence. An n-gram cannot span two sentences.",
|
|
159
159
|
"datatype": "STRING",
|
|
160
160
|
"allowsLists": false,
|
|
161
161
|
"rName": "reset",
|
|
@@ -0,0 +1,181 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "1",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_Apriori",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_alias_name": "TD_Apriori",
|
|
8
|
+
"function_type": "fastpath",
|
|
9
|
+
"function_category": "Association Analysis",
|
|
10
|
+
"function_r_name": "aa.td.apriori",
|
|
11
|
+
"short_description": "This function finds the patterns and calculates different statistical metrics to understand the influence of occurrance of set of items on other.",
|
|
12
|
+
"long_description": "This function finds the patterns and calculates different statistical metrics to understand the influence of occurrance of set of items on other.",
|
|
13
|
+
"input_tables": [
|
|
14
|
+
{
|
|
15
|
+
"requiredInputKind": [
|
|
16
|
+
"PartitionByAny"
|
|
17
|
+
],
|
|
18
|
+
"isOrdered": false,
|
|
19
|
+
"partitionByOne": false,
|
|
20
|
+
"name": "InputTable",
|
|
21
|
+
"alternateNames": [],
|
|
22
|
+
"isRequired": true,
|
|
23
|
+
"rDescription": "Specifies the table containing the input data.",
|
|
24
|
+
"description": "Specifies the table containing the input data.",
|
|
25
|
+
"datatype": "TABLE_ALIAS",
|
|
26
|
+
"allowsLists": false,
|
|
27
|
+
"rName": "data",
|
|
28
|
+
"useInR": true,
|
|
29
|
+
"rOrderNum": 1
|
|
30
|
+
}
|
|
31
|
+
],
|
|
32
|
+
"argument_clauses": [
|
|
33
|
+
{
|
|
34
|
+
"targetTable": [
|
|
35
|
+
"InputTable"
|
|
36
|
+
],
|
|
37
|
+
"checkDuplicate": true,
|
|
38
|
+
"allowedTypes": [],
|
|
39
|
+
"allowedTypeGroups": [
|
|
40
|
+
"STRING"
|
|
41
|
+
],
|
|
42
|
+
"matchLengthOfArgument": "",
|
|
43
|
+
"allowPadding": false,
|
|
44
|
+
"name": "TargetColumn",
|
|
45
|
+
"alternateNames": [],
|
|
46
|
+
"isRequired": true,
|
|
47
|
+
"rDescription": "Specify the column from the input table which contains the data.",
|
|
48
|
+
"description": "Specify the column from the input table which contains the data.",
|
|
49
|
+
"datatype": "COLUMNS",
|
|
50
|
+
"allowsLists": false,
|
|
51
|
+
"rName": "target.column",
|
|
52
|
+
"useInR": true,
|
|
53
|
+
"rOrderNum": 2
|
|
54
|
+
},
|
|
55
|
+
{
|
|
56
|
+
"targetTable": [
|
|
57
|
+
"InputTable"
|
|
58
|
+
],
|
|
59
|
+
"checkDuplicate": true,
|
|
60
|
+
"allowedTypes": [],
|
|
61
|
+
"allowedTypeGroups": [
|
|
62
|
+
"ALL"
|
|
63
|
+
],
|
|
64
|
+
"matchLengthOfArgument": "",
|
|
65
|
+
"allowPadding": false,
|
|
66
|
+
"name": "IDColumn",
|
|
67
|
+
"alternateNames": [],
|
|
68
|
+
"isRequired": false,
|
|
69
|
+
"rDescription": "Specifies the ID column to identify a datapoint(set of rows belong to same data) in a sparse input. Applicable only when isDenseInput is False",
|
|
70
|
+
"description": "Specifies the ID column to identify a datapoint(set of rows belong to same data) in a sparse input. Applicable only when isDenseInput is False",
|
|
71
|
+
"datatype": "COLUMNS",
|
|
72
|
+
"allowsLists": true,
|
|
73
|
+
"rName": "id.column",
|
|
74
|
+
"useInR": true,
|
|
75
|
+
"rOrderNum": 3
|
|
76
|
+
},
|
|
77
|
+
{
|
|
78
|
+
"targetTable": [
|
|
79
|
+
"InputTable"
|
|
80
|
+
],
|
|
81
|
+
"checkDuplicate": true,
|
|
82
|
+
"allowedTypes": [],
|
|
83
|
+
"allowedTypeGroups": [
|
|
84
|
+
"ALL"
|
|
85
|
+
],
|
|
86
|
+
"matchLengthOfArgument": "",
|
|
87
|
+
"allowPadding": false,
|
|
88
|
+
"name": "PartitionColumns",
|
|
89
|
+
"alternateNames": [],
|
|
90
|
+
"isRequired": false,
|
|
91
|
+
"rDescription": "Specify the name of the input table columns on which to partition the input.",
|
|
92
|
+
"description": "Specify the name of the input table columns on which to partition the input.",
|
|
93
|
+
"datatype": "COLUMNS",
|
|
94
|
+
"allowsLists": true,
|
|
95
|
+
"rName": "partition.columns",
|
|
96
|
+
"useInR": true,
|
|
97
|
+
"rOrderNum": 4
|
|
98
|
+
},
|
|
99
|
+
{
|
|
100
|
+
"defaultValue": 2,
|
|
101
|
+
"lowerBound": 1,
|
|
102
|
+
"upperBound": 20,
|
|
103
|
+
"lowerBoundType": "INCLUSIVE",
|
|
104
|
+
"upperBoundType": "INCLUSIVE",
|
|
105
|
+
"allowNaN": false,
|
|
106
|
+
"name": "MaxLen",
|
|
107
|
+
"alternateNames": [],
|
|
108
|
+
"isRequired": false,
|
|
109
|
+
"rDescription": "Specifies the number of items in item set. The default value is 2.",
|
|
110
|
+
"description": "Specifies the number of items in item set. The default value is 2.",
|
|
111
|
+
"datatype": "INTEGER",
|
|
112
|
+
"allowsLists": false,
|
|
113
|
+
"rName": "max.len",
|
|
114
|
+
"useInR": true,
|
|
115
|
+
"rOrderNum": 5
|
|
116
|
+
},
|
|
117
|
+
{
|
|
118
|
+
"permittedValues": [],
|
|
119
|
+
"defaultValue": ",",
|
|
120
|
+
"isOutputColumn": false,
|
|
121
|
+
"name": "Delimiter",
|
|
122
|
+
"alternateNames": [],
|
|
123
|
+
"isRequired": false,
|
|
124
|
+
"rDescription": "A character or string separates words in the input text. The default value is the comma. Applicable only when isDenseInput is True.",
|
|
125
|
+
"description": "A character or string separates words in the input text. The default value is the comma. Applicable only when isDenseInput is True.",
|
|
126
|
+
"datatype": "STRING",
|
|
127
|
+
"allowsLists": false,
|
|
128
|
+
"rName": "delimiter",
|
|
129
|
+
"useInR": true,
|
|
130
|
+
"rOrderNum": 6
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"defaultValue": false,
|
|
134
|
+
"name": "isDenseInput",
|
|
135
|
+
"alternateNames": [],
|
|
136
|
+
"isRequired": false,
|
|
137
|
+
"rDescription": "A Boolean value that specifies whether the input to the function is in dense format. Default is false (sparse format).",
|
|
138
|
+
"description": "A Boolean value that specifies whether the input to the function is in dense format. Default is false (sparse format).",
|
|
139
|
+
"datatype": "BOOLEAN",
|
|
140
|
+
"allowsLists": false,
|
|
141
|
+
"rName": "is.dense.input",
|
|
142
|
+
"useInR": true,
|
|
143
|
+
"rOrderNum": 7
|
|
144
|
+
},
|
|
145
|
+
{
|
|
146
|
+
"permittedValues": [
|
|
147
|
+
"PATTERNS",
|
|
148
|
+
"RULES"
|
|
149
|
+
],
|
|
150
|
+
"isOutputColumn": false,
|
|
151
|
+
"name": "PatternsOrRules",
|
|
152
|
+
"alternateNames": [],
|
|
153
|
+
"isRequired": false,
|
|
154
|
+
"rDescription": "Specify whether to emit PATTERNS or RULES as output.",
|
|
155
|
+
"description": "Specify whether to emit PATTERNS or RULES as output.",
|
|
156
|
+
"datatype": "STRING",
|
|
157
|
+
"allowsLists": false,
|
|
158
|
+
"rName": "patterns.or.rules",
|
|
159
|
+
"useInR": true,
|
|
160
|
+
"rOrderNum": 8
|
|
161
|
+
},
|
|
162
|
+
{
|
|
163
|
+
"defaultValue": 0.01,
|
|
164
|
+
"lowerBound": 0,
|
|
165
|
+
"upperBound": 1,
|
|
166
|
+
"lowerBoundType": "EXCLUSIVE",
|
|
167
|
+
"upperBoundType": "INCLUSIVE",
|
|
168
|
+
"allowNaN": false,
|
|
169
|
+
"name": "Support",
|
|
170
|
+
"alternateNames": [],
|
|
171
|
+
"isRequired": false,
|
|
172
|
+
"rDescription": "Specify the support value (minimum occurrance threshold) of itemset.",
|
|
173
|
+
"description": "Specify the support value (minimum occurrance threshold) of itemset.",
|
|
174
|
+
"datatype": "DOUBLE",
|
|
175
|
+
"allowsLists": false,
|
|
176
|
+
"rName": "support",
|
|
177
|
+
"useInR": true,
|
|
178
|
+
"rOrderNum": 9
|
|
179
|
+
}
|
|
180
|
+
]
|
|
181
|
+
}
|
|
@@ -0,0 +1,145 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "1",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_NERExtractor",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "fastpath",
|
|
8
|
+
"function_category": "Text Analysis",
|
|
9
|
+
"function_alias_name": "TD_NERExtractor",
|
|
10
|
+
"function_r_name": "aa.td_nerextractor",
|
|
11
|
+
"ref_function_r_name": "aa.td_nerextractor",
|
|
12
|
+
"short_description": "Perform Named Entity Recognition (NER) on input text according to user-defined dictionary words or regular expression (regex) patterns.",
|
|
13
|
+
"long_description": "Perform Named Entity Recognition (NER) on input text according to user-defined dictionary words or regular expression (regex) patterns.",
|
|
14
|
+
"input_tables": [
|
|
15
|
+
{
|
|
16
|
+
"requiredInputKind": [
|
|
17
|
+
"PartitionByAny"
|
|
18
|
+
],
|
|
19
|
+
"isOrdered": false,
|
|
20
|
+
"partitionByOne": false,
|
|
21
|
+
"name": "inputtable",
|
|
22
|
+
"alternateNames": [],
|
|
23
|
+
"isRequired": true,
|
|
24
|
+
"rDescription": "Specifies the table containing the input text data.",
|
|
25
|
+
"description": "Specifies the table containing the input text data.",
|
|
26
|
+
"datatype": "TABLE_ALIAS",
|
|
27
|
+
"allowsLists": false,
|
|
28
|
+
"rName": "data",
|
|
29
|
+
"useInR": true,
|
|
30
|
+
"rOrderNum": 1
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"requiredInputKind": [
|
|
34
|
+
"Dimension"
|
|
35
|
+
],
|
|
36
|
+
"isOrdered": false,
|
|
37
|
+
"partitionByOne": false,
|
|
38
|
+
"name": "dict",
|
|
39
|
+
"alternateNames": [],
|
|
40
|
+
"isRequired": true,
|
|
41
|
+
"rDescription": "Specifies the table containing user-defined dictionary words and their entity label.",
|
|
42
|
+
"description": "Specifies the table containing user-defined dictionary words and their entity label.",
|
|
43
|
+
"datatype": "TABLE_ALIAS",
|
|
44
|
+
"allowsLists": false,
|
|
45
|
+
"rName": "user.defined.data",
|
|
46
|
+
"useInR": true,
|
|
47
|
+
"rOrderNum": 2
|
|
48
|
+
},
|
|
49
|
+
{
|
|
50
|
+
"requiredInputKind": [
|
|
51
|
+
"Dimension"
|
|
52
|
+
],
|
|
53
|
+
"isOrdered": false,
|
|
54
|
+
"partitionByOne": false,
|
|
55
|
+
"name": "rules",
|
|
56
|
+
"alternateNames": [],
|
|
57
|
+
"isRequired": true,
|
|
58
|
+
"rDescription": "Specifies the table containing user-defined regex patterns and their entity label.",
|
|
59
|
+
"description": "Specifies the table containing user-defined regex patterns and their entity label.",
|
|
60
|
+
"datatype": "TABLE_ALIAS",
|
|
61
|
+
"allowsLists": false,
|
|
62
|
+
"rName": "rules.data",
|
|
63
|
+
"useInR": true,
|
|
64
|
+
"rOrderNum": 3
|
|
65
|
+
}
|
|
66
|
+
],
|
|
67
|
+
"argument_clauses": [
|
|
68
|
+
{
|
|
69
|
+
"targetTable": [
|
|
70
|
+
"inputtable"
|
|
71
|
+
],
|
|
72
|
+
"matchLengthOfArgument": "",
|
|
73
|
+
"allowPadding": false,
|
|
74
|
+
"name": "TextColumn",
|
|
75
|
+
"alternateNames": [],
|
|
76
|
+
"isRequired": true,
|
|
77
|
+
"rDescription": "Specify the name of the input table column that will be used for NER search.",
|
|
78
|
+
"description": "Specify the name of the input table column that will be used for NER search.",
|
|
79
|
+
"datatype": "COLUMNS",
|
|
80
|
+
"allowsLists": true,
|
|
81
|
+
"rName": "text.column",
|
|
82
|
+
"useInR": true,
|
|
83
|
+
"rFormulaUsage": true,
|
|
84
|
+
"rOrderNum": 4
|
|
85
|
+
},
|
|
86
|
+
{
|
|
87
|
+
"defaultValue": "EN",
|
|
88
|
+
"permittedValues": [
|
|
89
|
+
"EN"
|
|
90
|
+
],
|
|
91
|
+
"isOutputColumn": false,
|
|
92
|
+
"name": "InputLanguage",
|
|
93
|
+
"alternateNames": [],
|
|
94
|
+
"isRequired": false,
|
|
95
|
+
"rDescription": "Specify input language. Acceptable values are EN (English).",
|
|
96
|
+
"description": "Specify input language. Acceptable values are EN (English).",
|
|
97
|
+
"datatype": "STRING",
|
|
98
|
+
"allowsLists": false,
|
|
99
|
+
"rName": "input.language",
|
|
100
|
+
"useInR": true,
|
|
101
|
+
"rOrderNum": 5
|
|
102
|
+
},
|
|
103
|
+
{
|
|
104
|
+
"defaultValue": 0,
|
|
105
|
+
"lowerBound": 1,
|
|
106
|
+
"upperBound": 10,
|
|
107
|
+
"lowerBoundType": "INCLUSIVE",
|
|
108
|
+
"upperBoundType": "INCLUSIVE",
|
|
109
|
+
"allowNaN": false,
|
|
110
|
+
"name": "ShowContext",
|
|
111
|
+
"alternateNames": [],
|
|
112
|
+
"isRequired": false,
|
|
113
|
+
"rDescription": "Specify the number of words before and after the matched entity. If leading or trailing words are less than ShowContext ellipsis (...) will be added. Must be a positive value less than 10.",
|
|
114
|
+
"description": "Specify the number of words before and after the matched entity. If leading or trailing words are less than ShowContext ellipsis (...) will be added. Must be a positive value less than 10.",
|
|
115
|
+
"datatype": "INTEGER",
|
|
116
|
+
"allowsLists": false,
|
|
117
|
+
"rName": "show.context",
|
|
118
|
+
"useInR": true,
|
|
119
|
+
"rOrderNum": 6
|
|
120
|
+
},
|
|
121
|
+
{
|
|
122
|
+
"targetTable": [
|
|
123
|
+
"inputtable"
|
|
124
|
+
],
|
|
125
|
+
"checkDuplicate": false,
|
|
126
|
+
"allowedTypes": [],
|
|
127
|
+
"allowedTypeGroups": [
|
|
128
|
+
"ALL"
|
|
129
|
+
],
|
|
130
|
+
"matchLengthOfArgument": "",
|
|
131
|
+
"allowPadding": false,
|
|
132
|
+
"name": "Accumulate",
|
|
133
|
+
"alternateNames": [],
|
|
134
|
+
"isRequired": false,
|
|
135
|
+
"rDescription": "Specify the names of the input table columns that need to be copied from the input test table to output.",
|
|
136
|
+
"description": "Specify the names of the input table columns that need to be copied from the input test table to output.",
|
|
137
|
+
"datatype": "COLUMNS",
|
|
138
|
+
"allowsLists": true,
|
|
139
|
+
"rName": "accumulate",
|
|
140
|
+
"useInR": true,
|
|
141
|
+
"rOrderNum": 7
|
|
142
|
+
}
|
|
143
|
+
]
|
|
144
|
+
}
|
|
145
|
+
|