teradataml 20.0.0.3__py3-none-any.whl → 20.0.0.5__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of teradataml might be problematic. Click here for more details.
- teradataml/LICENSE-3RD-PARTY.pdf +0 -0
- teradataml/README.md +193 -1
- teradataml/__init__.py +2 -1
- teradataml/_version.py +2 -2
- teradataml/analytics/analytic_function_executor.py +25 -18
- teradataml/analytics/byom/__init__.py +1 -1
- teradataml/analytics/json_parser/analytic_functions_argument.py +4 -0
- teradataml/analytics/sqle/__init__.py +20 -2
- teradataml/analytics/utils.py +15 -1
- teradataml/analytics/valib.py +18 -4
- teradataml/automl/__init__.py +341 -112
- teradataml/automl/autodataprep/__init__.py +471 -0
- teradataml/automl/data_preparation.py +84 -42
- teradataml/automl/data_transformation.py +69 -33
- teradataml/automl/feature_engineering.py +76 -9
- teradataml/automl/feature_exploration.py +639 -25
- teradataml/automl/model_training.py +35 -14
- teradataml/clients/auth_client.py +2 -2
- teradataml/common/__init__.py +1 -2
- teradataml/common/constants.py +122 -63
- teradataml/common/messagecodes.py +14 -3
- teradataml/common/messages.py +8 -4
- teradataml/common/sqlbundle.py +40 -10
- teradataml/common/utils.py +366 -74
- teradataml/common/warnings.py +11 -0
- teradataml/context/context.py +348 -86
- teradataml/data/amazon_reviews_25.csv +26 -0
- teradataml/data/apriori_example.json +22 -0
- teradataml/data/byom_example.json +11 -0
- teradataml/data/docs/byom/docs/DataRobotPredict.py +2 -2
- teradataml/data/docs/byom/docs/DataikuPredict.py +40 -1
- teradataml/data/docs/byom/docs/H2OPredict.py +2 -2
- teradataml/data/docs/byom/docs/ONNXEmbeddings.py +242 -0
- teradataml/data/docs/byom/docs/ONNXPredict.py +2 -2
- teradataml/data/docs/byom/docs/PMMLPredict.py +2 -2
- teradataml/data/docs/sqle/docs_17_20/Apriori.py +138 -0
- teradataml/data/docs/sqle/docs_17_20/NERExtractor.py +121 -0
- teradataml/data/docs/sqle/docs_17_20/NGramSplitter.py +3 -3
- teradataml/data/docs/sqle/docs_17_20/SMOTE.py +212 -0
- teradataml/data/docs/sqle/docs_17_20/Shap.py +28 -6
- teradataml/data/docs/sqle/docs_17_20/TextMorph.py +119 -0
- teradataml/data/docs/sqle/docs_17_20/TextParser.py +54 -3
- teradataml/data/docs/uaf/docs_17_20/ACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/ArimaEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/ArimaXEstimate.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/DFFT.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFT2Conv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DFFTConv.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/DWT2D.py +4 -1
- teradataml/data/docs/uaf/docs_17_20/FilterFactory1d.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/GenseriesSinusoids.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/GoldfeldQuandt.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/HoltWintersForecaster.py +6 -6
- teradataml/data/docs/uaf/docs_17_20/LineSpec.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/LinearRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Matrix2Image.py +4 -4
- teradataml/data/docs/uaf/docs_17_20/MultivarRegr.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PACF.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/PowerSpec.py +2 -2
- teradataml/data/docs/uaf/docs_17_20/PowerTransform.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/Resample.py +5 -5
- teradataml/data/docs/uaf/docs_17_20/SAX.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/SignifPeriodicities.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/SimpleExp.py +1 -1
- teradataml/data/docs/uaf/docs_17_20/Smoothma.py +3 -3
- teradataml/data/docs/uaf/docs_17_20/UNDIFF.py +1 -1
- teradataml/data/hnsw_alter_data.csv +5 -0
- teradataml/data/hnsw_data.csv +10 -0
- teradataml/data/jsons/byom/h2opredict.json +1 -1
- teradataml/data/jsons/byom/onnxembeddings.json +266 -0
- teradataml/data/jsons/sqle/17.20/NGramSplitter.json +6 -6
- teradataml/data/jsons/sqle/17.20/TD_Apriori.json +181 -0
- teradataml/data/jsons/sqle/17.20/TD_NERExtractor.json +145 -0
- teradataml/data/jsons/sqle/17.20/TD_SMOTE.json +267 -0
- teradataml/data/jsons/sqle/17.20/TD_Shap.json +0 -1
- teradataml/data/jsons/sqle/17.20/TD_TextMorph.json +134 -0
- teradataml/data/jsons/sqle/17.20/TD_TextParser.json +114 -9
- teradataml/data/jsons/sqle/20.00/AI_AnalyzeSentiment.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_AskLLM.json +420 -0
- teradataml/data/jsons/sqle/20.00/AI_DetectLanguage.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_ExtractKeyPhrases.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_MaskPII.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizeEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_RecognizePIIEntities.json +328 -0
- teradataml/data/jsons/sqle/20.00/AI_TextClassifier.json +359 -0
- teradataml/data/jsons/sqle/20.00/AI_TextEmbeddings.json +360 -0
- teradataml/data/jsons/sqle/20.00/AI_TextSummarize.json +343 -0
- teradataml/data/jsons/sqle/20.00/AI_TextTranslate.json +343 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSW.json +296 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWPredict.json +206 -0
- teradataml/data/jsons/sqle/20.00/TD_HNSWSummary.json +32 -0
- teradataml/data/jsons/sqle/20.00/TD_KMeans.json +2 -2
- teradataml/data/jsons/sqle/20.00/TD_SMOTE.json +3 -3
- teradataml/data/jsons/sqle/20.00/TD_VectorDistance.json +6 -6
- teradataml/data/ner_dict.csv +8 -0
- teradataml/data/ner_input_eng.csv +7 -0
- teradataml/data/ner_rule.csv +5 -0
- teradataml/data/pos_input.csv +40 -0
- teradataml/data/tdnerextractor_example.json +14 -0
- teradataml/data/teradataml_example.json +21 -0
- teradataml/data/textmorph_example.json +5 -0
- teradataml/data/to_num_data.csv +4 -0
- teradataml/data/tochar_data.csv +5 -0
- teradataml/data/trans_dense.csv +16 -0
- teradataml/data/trans_sparse.csv +55 -0
- teradataml/data/vectordistance_example.json +1 -1
- teradataml/dataframe/copy_to.py +45 -29
- teradataml/dataframe/data_transfer.py +72 -46
- teradataml/dataframe/dataframe.py +642 -166
- teradataml/dataframe/dataframe_utils.py +167 -22
- teradataml/dataframe/functions.py +135 -20
- teradataml/dataframe/setop.py +11 -6
- teradataml/dataframe/sql.py +330 -78
- teradataml/dbutils/dbutils.py +556 -140
- teradataml/dbutils/filemgr.py +14 -10
- teradataml/hyperparameter_tuner/optimizer.py +12 -1
- teradataml/lib/aed_0_1.dll +0 -0
- teradataml/opensource/{sklearn/_sklearn_wrapper.py → _base.py} +168 -1013
- teradataml/opensource/_class.py +141 -17
- teradataml/opensource/{constants.py → _constants.py} +7 -3
- teradataml/opensource/_lightgbm.py +52 -53
- teradataml/opensource/_sklearn.py +1008 -0
- teradataml/opensource/_wrapper_utils.py +5 -5
- teradataml/options/__init__.py +47 -15
- teradataml/options/configure.py +103 -26
- teradataml/options/display.py +13 -2
- teradataml/plot/axis.py +47 -8
- teradataml/plot/figure.py +33 -0
- teradataml/plot/plot.py +63 -13
- teradataml/scriptmgmt/UserEnv.py +307 -40
- teradataml/scriptmgmt/lls_utils.py +428 -145
- teradataml/store/__init__.py +2 -3
- teradataml/store/feature_store/feature_store.py +102 -7
- teradataml/table_operators/Apply.py +48 -19
- teradataml/table_operators/Script.py +23 -2
- teradataml/table_operators/TableOperator.py +3 -1
- teradataml/table_operators/table_operator_util.py +58 -9
- teradataml/utils/dtypes.py +49 -1
- teradataml/utils/internal_buffer.py +38 -0
- teradataml/utils/validators.py +377 -62
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/METADATA +200 -4
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/RECORD +146 -112
- teradataml/data/SQL_Fundamentals.pdf +0 -0
- teradataml/libaed_0_1.dylib +0 -0
- teradataml/libaed_0_1.so +0 -0
- teradataml/opensource/sklearn/__init__.py +0 -0
- teradataml/store/vector_store/__init__.py +0 -1586
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/WHEEL +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/top_level.txt +0 -0
- {teradataml-20.0.0.3.dist-info → teradataml-20.0.0.5.dist-info}/zip-safe +0 -0
|
@@ -0,0 +1,296 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "1",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_HNSW",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "fastpath",
|
|
8
|
+
"function_category": "Model Training",
|
|
9
|
+
"function_alias_name": "TD_HNSW",
|
|
10
|
+
"function_r_name": "td.hnsw",
|
|
11
|
+
"short_description": "The TD_HNSW function generates the HNSW model using the input data points which is then used by TD_HNSWPredict function for determining the approximate nearest neighbors for any given input data point.",
|
|
12
|
+
"long_description": "The TD_HNSW function generates the HNSW model using the input data points which is then used by TD_HNSWPredict function for determining the approximate nearest neighbors for any given input data point.",
|
|
13
|
+
"input_tables": [
|
|
14
|
+
{
|
|
15
|
+
"requiredInputKind": [
|
|
16
|
+
"PartitionByAny",
|
|
17
|
+
"Dimension"
|
|
18
|
+
],
|
|
19
|
+
"isOrdered": false,
|
|
20
|
+
"partitionByOne": false,
|
|
21
|
+
"name": "InputTable",
|
|
22
|
+
"alternateNames": [],
|
|
23
|
+
"isRequired": true,
|
|
24
|
+
"rDescription": "The table that contains the input dataset for HNSW model training/update/delete operation.",
|
|
25
|
+
"description": "The table that contains the input dataset for HNSW model training/update/delete operation.",
|
|
26
|
+
"datatype": "TABLE_ALIAS",
|
|
27
|
+
"allowsLists": false,
|
|
28
|
+
"rName": "data",
|
|
29
|
+
"useInR": true,
|
|
30
|
+
"rOrderNum": 1
|
|
31
|
+
},
|
|
32
|
+
{
|
|
33
|
+
"requiredInputKind": [
|
|
34
|
+
"PartitionByAny"
|
|
35
|
+
],
|
|
36
|
+
"isOrdered": false,
|
|
37
|
+
"partitionByOne": false,
|
|
38
|
+
"name": "InputModelTable",
|
|
39
|
+
"alternateNames": [],
|
|
40
|
+
"isRequired": false,
|
|
41
|
+
"rDescription": "The table that contains the HNSW model for update/delete operation.",
|
|
42
|
+
"description": "The table that contains the HNSW model for update/delete operation.",
|
|
43
|
+
"datatype": "TABLE_ALIAS",
|
|
44
|
+
"allowsLists": false,
|
|
45
|
+
"rName": "object",
|
|
46
|
+
"useInR": true,
|
|
47
|
+
"rOrderNum": 2
|
|
48
|
+
}
|
|
49
|
+
],
|
|
50
|
+
"output_tables": [
|
|
51
|
+
{
|
|
52
|
+
"isOutputTable": true,
|
|
53
|
+
"omitPossible": false,
|
|
54
|
+
"name": "ModelTable",
|
|
55
|
+
"alternateNames": [],
|
|
56
|
+
"isRequired": true,
|
|
57
|
+
"rDescription": "Specifies the name of the table in which the generated HNSW model can be stored.",
|
|
58
|
+
"description": "Specifies the name of the table in which the generated HNSW model can be stored.",
|
|
59
|
+
"datatype": "TABLE_NAME",
|
|
60
|
+
"allowsLists": false,
|
|
61
|
+
"rName": "model.table",
|
|
62
|
+
"useInR": true,
|
|
63
|
+
"rOrderNum": 3
|
|
64
|
+
}
|
|
65
|
+
],
|
|
66
|
+
"argument_clauses": [
|
|
67
|
+
{
|
|
68
|
+
"targetTable": [
|
|
69
|
+
"InputTable"
|
|
70
|
+
],
|
|
71
|
+
"checkDuplicate": true,
|
|
72
|
+
"allowedTypes": [],
|
|
73
|
+
"allowedTypeGroups": [
|
|
74
|
+
"INTEGER"
|
|
75
|
+
],
|
|
76
|
+
"requiredLength": 1,
|
|
77
|
+
"matchLengthOfArgument": "",
|
|
78
|
+
"allowPadding": false,
|
|
79
|
+
"name": "IdColumn",
|
|
80
|
+
"alternateNames": [],
|
|
81
|
+
"isRequired": true,
|
|
82
|
+
"rDescription": "Specify the column name containing unique identifier of input rows.",
|
|
83
|
+
"description": "Specify the column name containing unique identifier of input rows.",
|
|
84
|
+
"datatype": "COLUMN_NAMES",
|
|
85
|
+
"allowsLists": false,
|
|
86
|
+
"rName": "id.column",
|
|
87
|
+
"useInR": true,
|
|
88
|
+
"rOrderNum": 4
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"targetTable": [
|
|
92
|
+
"InputTable"
|
|
93
|
+
],
|
|
94
|
+
"checkDuplicate": true,
|
|
95
|
+
"allowedTypes": [],
|
|
96
|
+
"allowedTypeGroups": [
|
|
97
|
+
"VECTOR","BYTE","VARBYTE"
|
|
98
|
+
],
|
|
99
|
+
"requiredLength": 1,
|
|
100
|
+
"matchLengthOfArgument": "",
|
|
101
|
+
"allowPadding": false,
|
|
102
|
+
"name": "VectorColumn",
|
|
103
|
+
"alternateNames": [],
|
|
104
|
+
"isRequired": true,
|
|
105
|
+
"rDescription": "Specifies the column from the input table to be used for training the HNSW model.",
|
|
106
|
+
"description": "Specifies the column from the input table to be used for training the HNSW model.",
|
|
107
|
+
"datatype": "COLUMN_NAMES",
|
|
108
|
+
"allowsLists": false,
|
|
109
|
+
"rName": "vector.column",
|
|
110
|
+
"useInR": true,
|
|
111
|
+
"rOrderNum": 5
|
|
112
|
+
},
|
|
113
|
+
{
|
|
114
|
+
"lowerBound": 1,
|
|
115
|
+
"upperBound": 1024,
|
|
116
|
+
"lowerBoundType": "INCLUSIVE",
|
|
117
|
+
"upperBoundType": "INCLUSIVE",
|
|
118
|
+
"allowNaN": false,
|
|
119
|
+
"name": "NumLayer",
|
|
120
|
+
"alternateNames": [],
|
|
121
|
+
"isRequired": false,
|
|
122
|
+
"rDescription": "Specify the maximum number of layers for the HNSW model.",
|
|
123
|
+
"description": "Specify the maximum number of layers for the HNSW model.",
|
|
124
|
+
"datatype": "INTEGER",
|
|
125
|
+
"allowsLists": false,
|
|
126
|
+
"rName": "num.layer",
|
|
127
|
+
"useInR": true,
|
|
128
|
+
"rOrderNum": 6
|
|
129
|
+
},
|
|
130
|
+
{
|
|
131
|
+
"lowerBound": 0,
|
|
132
|
+
"upperBound": 2147483647,
|
|
133
|
+
"lowerBoundType": "INCLUSIVE",
|
|
134
|
+
"upperBoundType": "INCLUSIVE",
|
|
135
|
+
"allowNaN": false,
|
|
136
|
+
"name": "Seed",
|
|
137
|
+
"alternateNames": [],
|
|
138
|
+
"isRequired": false,
|
|
139
|
+
"rDescription": "Specify the random seed value for repeatable results.",
|
|
140
|
+
"description": "Specify the random seed value for repeatable results.",
|
|
141
|
+
"datatype": "INTEGER",
|
|
142
|
+
"allowsLists": false,
|
|
143
|
+
"rName": "seed",
|
|
144
|
+
"useInR": true,
|
|
145
|
+
"rOrderNum": 7
|
|
146
|
+
},
|
|
147
|
+
{
|
|
148
|
+
"defaultValue": 32,
|
|
149
|
+
"lowerBound": 1,
|
|
150
|
+
"upperBound": 1024,
|
|
151
|
+
"lowerBoundType": "INCLUSIVE",
|
|
152
|
+
"upperBoundType": "INCLUSIVE",
|
|
153
|
+
"allowNaN": false,
|
|
154
|
+
"name": "EfConstruction",
|
|
155
|
+
"alternateNames": [],
|
|
156
|
+
"isRequired": false,
|
|
157
|
+
"rDescription": "Specify the number of neighbors to search during training of HNSW model.",
|
|
158
|
+
"description": "Specify the number of neighbors to search during training of HNSW model.",
|
|
159
|
+
"datatype": "INTEGER",
|
|
160
|
+
"allowsLists": false,
|
|
161
|
+
"rName": "ef.construction",
|
|
162
|
+
"useInR": true,
|
|
163
|
+
"rOrderNum": 8
|
|
164
|
+
},
|
|
165
|
+
{
|
|
166
|
+
"defaultValue": 32,
|
|
167
|
+
"lowerBound": 1,
|
|
168
|
+
"upperBound": 1024,
|
|
169
|
+
"lowerBoundType": "INCLUSIVE",
|
|
170
|
+
"upperBoundType": "INCLUSIVE",
|
|
171
|
+
"allowNaN": false,
|
|
172
|
+
"name": "NumConnPerNode",
|
|
173
|
+
"alternateNames": [],
|
|
174
|
+
"isRequired": false,
|
|
175
|
+
"rDescription": "Specify number of connections for a node during training of HNSW model.",
|
|
176
|
+
"description": "Specify number of connections for a node during training of HNSW model.",
|
|
177
|
+
"datatype": "INTEGER",
|
|
178
|
+
"allowsLists": false,
|
|
179
|
+
"rName": "numconn.pernode",
|
|
180
|
+
"useInR": true,
|
|
181
|
+
"rOrderNum": 9
|
|
182
|
+
},
|
|
183
|
+
{
|
|
184
|
+
"defaultValue": 32,
|
|
185
|
+
"lowerBound": 1,
|
|
186
|
+
"upperBound": 1024,
|
|
187
|
+
"lowerBoundType": "INCLUSIVE",
|
|
188
|
+
"upperBoundType": "INCLUSIVE",
|
|
189
|
+
"allowNaN": false,
|
|
190
|
+
"name": "MaxNumConnPerNode",
|
|
191
|
+
"alternateNames": [],
|
|
192
|
+
"isRequired": false,
|
|
193
|
+
"rDescription": "Specify maximum number of connections allowed for a node during training of HNSW model.",
|
|
194
|
+
"description": "Specify maximum number of connections allowed for a node during training of HNSW model.",
|
|
195
|
+
"datatype": "INTEGER",
|
|
196
|
+
"allowsLists": false,
|
|
197
|
+
"rName": "maxnumconn.pernode",
|
|
198
|
+
"useInR": true,
|
|
199
|
+
"rOrderNum": 10
|
|
200
|
+
},
|
|
201
|
+
{
|
|
202
|
+
"permittedValues": [
|
|
203
|
+
"EUCLIDEAN",
|
|
204
|
+
"COSINE",
|
|
205
|
+
"DOTPRODUCT"
|
|
206
|
+
],
|
|
207
|
+
"defaultValue": "EUCLIDEAN",
|
|
208
|
+
"isOutputColumn": false,
|
|
209
|
+
"matchLengthOfArgument": "",
|
|
210
|
+
"allowPadding": false,
|
|
211
|
+
"name": "DistanceMeasure",
|
|
212
|
+
"alternateNames": [],
|
|
213
|
+
"isRequired": false,
|
|
214
|
+
"rDescription": "Specify the distance measure to be used for distance computation.",
|
|
215
|
+
"description": "Specify the distance measure to be used for distance computation.",
|
|
216
|
+
"datatype": "STRING",
|
|
217
|
+
"allowsLists": false,
|
|
218
|
+
"rName": "distance.measure",
|
|
219
|
+
"useInR": true,
|
|
220
|
+
"rOrderNum": 11
|
|
221
|
+
},
|
|
222
|
+
{
|
|
223
|
+
"lowerBound": 1,
|
|
224
|
+
"upperBound": 4096,
|
|
225
|
+
"lowerBoundType": "INCLUSIVE",
|
|
226
|
+
"upperBoundType": "INCLUSIVE",
|
|
227
|
+
"allowNaN": false,
|
|
228
|
+
"name": "EmbeddingSize",
|
|
229
|
+
"alternateNames": [],
|
|
230
|
+
"isRequired": false,
|
|
231
|
+
"rDescription": "Specify the embedding size of the vectors.",
|
|
232
|
+
"description": "Specify the embedding size of the vectors.",
|
|
233
|
+
"datatype": "INTEGER",
|
|
234
|
+
"allowsLists": false,
|
|
235
|
+
"rName": "embedding.size",
|
|
236
|
+
"useInR": true,
|
|
237
|
+
"rOrderNum": 12
|
|
238
|
+
},
|
|
239
|
+
{
|
|
240
|
+
"permittedValues": [],
|
|
241
|
+
"isOutputColumn": false,
|
|
242
|
+
"matchLengthOfArgument": "",
|
|
243
|
+
"allowPadding": false,
|
|
244
|
+
"defaultValue": false,
|
|
245
|
+
"name": "ApplyHeuristics",
|
|
246
|
+
"alternateNames": [],
|
|
247
|
+
"isRequired": false,
|
|
248
|
+
"rDescription": "Specify whether to apply heuristics optimizations during training of HNSW model.",
|
|
249
|
+
"description": "Specify whether to apply heuristics optimizations during training of HNSW model.",
|
|
250
|
+
"datatype": "BOOLEAN",
|
|
251
|
+
"allowsLists": false,
|
|
252
|
+
"rName": "apply.heuristics",
|
|
253
|
+
"useInR": true,
|
|
254
|
+
"rOrderNum": 13
|
|
255
|
+
},
|
|
256
|
+
{
|
|
257
|
+
"permittedValues": [
|
|
258
|
+
"UPDATE",
|
|
259
|
+
"DELETE"
|
|
260
|
+
],
|
|
261
|
+
"isOutputColumn": false,
|
|
262
|
+
"matchLengthOfArgument": "",
|
|
263
|
+
"allowPadding": false,
|
|
264
|
+
"name": "AlterOperation",
|
|
265
|
+
"alternateNames": [],
|
|
266
|
+
"isRequired": false,
|
|
267
|
+
"rDescription": "Specify the alter operation for HNSW model. This argument is required when InputModelTable is provided.",
|
|
268
|
+
"description": "Specify the alter operation for HNSW model. This argument is required when InputModelTable is provided.",
|
|
269
|
+
"datatype": "STRING",
|
|
270
|
+
"allowsLists": true,
|
|
271
|
+
"rName": "alter.operation",
|
|
272
|
+
"useInR": true,
|
|
273
|
+
"rOrderNum": 14
|
|
274
|
+
},
|
|
275
|
+
{
|
|
276
|
+
"permittedValues": [
|
|
277
|
+
"RECONSTRUCTION",
|
|
278
|
+
"DELETENODE"
|
|
279
|
+
],
|
|
280
|
+
"defaultValue": "RECONSTRUCTION",
|
|
281
|
+
"isOutputColumn": false,
|
|
282
|
+
"matchLengthOfArgument": "",
|
|
283
|
+
"allowPadding": false,
|
|
284
|
+
"name": "DeleteMethod",
|
|
285
|
+
"alternateNames": [],
|
|
286
|
+
"isRequired": false,
|
|
287
|
+
"rDescription": "Specify the method for delete operation.",
|
|
288
|
+
"description": "Specify the method for delete operation.",
|
|
289
|
+
"datatype": "STRING",
|
|
290
|
+
"allowsLists": true,
|
|
291
|
+
"rName": "delete.method",
|
|
292
|
+
"useInR": true,
|
|
293
|
+
"rOrderNum": 15
|
|
294
|
+
}
|
|
295
|
+
]
|
|
296
|
+
}
|
|
@@ -0,0 +1,206 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "1",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_HNSWPredict",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "fastpath",
|
|
8
|
+
"function_category": "Model Scoring",
|
|
9
|
+
"function_alias_name": "TD_HNSWPredict",
|
|
10
|
+
"function_r_name": "td.hnswpredict",
|
|
11
|
+
"short_description": "The TD_HNSWPredict function takes the InputTable and finds the approximate nearest neighbors for the input data points using the HNSW model generated from TD_HNSW function.",
|
|
12
|
+
"long_description": "The TD_HNSWPredict function takes the InputTable and finds the approximate nearest neighbors for the input data points using the HNSW model generated from TD_HNSW function.",
|
|
13
|
+
"input_tables": [
|
|
14
|
+
{
|
|
15
|
+
"requiredInputKind": [
|
|
16
|
+
"Dimension"
|
|
17
|
+
],
|
|
18
|
+
"isOrdered": false,
|
|
19
|
+
"partitionByOne": false,
|
|
20
|
+
"name": "InputTable",
|
|
21
|
+
"alternateNames": [],
|
|
22
|
+
"isRequired": true,
|
|
23
|
+
"rDescription": "The table that contains the input dataset for HNSW model scoring.",
|
|
24
|
+
"description": "The table that contains the input dataset for HNSW model scoring.",
|
|
25
|
+
"datatype": "TABLE_ALIAS",
|
|
26
|
+
"allowsLists": false,
|
|
27
|
+
"rName": "data",
|
|
28
|
+
"useInR": true,
|
|
29
|
+
"rOrderNum": 2
|
|
30
|
+
},
|
|
31
|
+
{
|
|
32
|
+
"requiredInputKind": [
|
|
33
|
+
"PartitionByAny"
|
|
34
|
+
],
|
|
35
|
+
"isOrdered": false,
|
|
36
|
+
"partitionByOne": false,
|
|
37
|
+
"name": "ModelTable",
|
|
38
|
+
"alternateNames": [],
|
|
39
|
+
"isRequired": true,
|
|
40
|
+
"rDescription": "The table that contains the HNSW model for scoring.",
|
|
41
|
+
"description": "The table that contains the HNSW model for scoring.",
|
|
42
|
+
"datatype": "TABLE_ALIAS",
|
|
43
|
+
"allowsLists": false,
|
|
44
|
+
"rName": "object",
|
|
45
|
+
"useInR": true,
|
|
46
|
+
"rOrderNum": 1
|
|
47
|
+
}
|
|
48
|
+
],
|
|
49
|
+
"argument_clauses": [
|
|
50
|
+
{
|
|
51
|
+
"targetTable": [
|
|
52
|
+
"InputTable"
|
|
53
|
+
],
|
|
54
|
+
"checkDuplicate": true,
|
|
55
|
+
"allowedTypes": [],
|
|
56
|
+
"allowedTypeGroups": [
|
|
57
|
+
"INTEGER"
|
|
58
|
+
],
|
|
59
|
+
"requiredLength": 1,
|
|
60
|
+
"matchLengthOfArgument": "",
|
|
61
|
+
"allowPadding": false,
|
|
62
|
+
"name": "IdColumn",
|
|
63
|
+
"alternateNames": [],
|
|
64
|
+
"isRequired": true,
|
|
65
|
+
"rDescription": "Specify the column name containing unique identifier of input rows.",
|
|
66
|
+
"description": "Specify the column name containing unique identifier of input rows.",
|
|
67
|
+
"datatype": "COLUMN_NAMES",
|
|
68
|
+
"allowsLists": false,
|
|
69
|
+
"rName": "id.column",
|
|
70
|
+
"useInR": true,
|
|
71
|
+
"rOrderNum": 3
|
|
72
|
+
},
|
|
73
|
+
{
|
|
74
|
+
"targetTable": [
|
|
75
|
+
"InputTable"
|
|
76
|
+
],
|
|
77
|
+
"checkDuplicate": true,
|
|
78
|
+
"allowedTypes": [],
|
|
79
|
+
"allowedTypeGroups": [
|
|
80
|
+
"VECTOR","BYTE","VARBYTE"
|
|
81
|
+
],
|
|
82
|
+
"requiredLength": 1,
|
|
83
|
+
"matchLengthOfArgument": "",
|
|
84
|
+
"allowPadding": false,
|
|
85
|
+
"name": "VectorColumn",
|
|
86
|
+
"alternateNames": [],
|
|
87
|
+
"isRequired": true,
|
|
88
|
+
"rDescription": "Specifies the column from the input table to be used for training the HNSW model.",
|
|
89
|
+
"description": "Specifies the column from the input table to be used for training the HNSW model.",
|
|
90
|
+
"datatype": "COLUMN_NAMES",
|
|
91
|
+
"allowsLists": false,
|
|
92
|
+
"rName": "vector.column",
|
|
93
|
+
"useInR": true,
|
|
94
|
+
"rOrderNum": 4
|
|
95
|
+
},
|
|
96
|
+
{
|
|
97
|
+
"defaultValue": 10,
|
|
98
|
+
"lowerBound": 1,
|
|
99
|
+
"upperBound": 1024,
|
|
100
|
+
"lowerBoundType": "INCLUSIVE",
|
|
101
|
+
"upperBoundType": "INCLUSIVE",
|
|
102
|
+
"allowNaN": false,
|
|
103
|
+
"name": "TopK",
|
|
104
|
+
"alternateNames": [],
|
|
105
|
+
"isRequired": false,
|
|
106
|
+
"rDescription": "Specify number of top nearest neighbors to generate in the output.",
|
|
107
|
+
"description": "Specify number of top nearest neighbors to generate in the output.",
|
|
108
|
+
"datatype": "INTEGER",
|
|
109
|
+
"allowsLists": false,
|
|
110
|
+
"rName": "top_k",
|
|
111
|
+
"useInR": true,
|
|
112
|
+
"rOrderNum": 5
|
|
113
|
+
},
|
|
114
|
+
{
|
|
115
|
+
"defaultValue": 32,
|
|
116
|
+
"lowerBound": 1,
|
|
117
|
+
"upperBound": 1024,
|
|
118
|
+
"lowerBoundType": "INCLUSIVE",
|
|
119
|
+
"upperBoundType": "INCLUSIVE",
|
|
120
|
+
"allowNaN": false,
|
|
121
|
+
"name": "EfSearch",
|
|
122
|
+
"alternateNames": [],
|
|
123
|
+
"isRequired": false,
|
|
124
|
+
"rDescription": "Specify the number of neighbors to search during search in HNSW model.",
|
|
125
|
+
"description": "Specify the number of neighbors to search during search in HNSW model.",
|
|
126
|
+
"datatype": "INTEGER",
|
|
127
|
+
"allowsLists": false,
|
|
128
|
+
"rName": "ef.search",
|
|
129
|
+
"useInR": true,
|
|
130
|
+
"rOrderNum": 6
|
|
131
|
+
},
|
|
132
|
+
{
|
|
133
|
+
"permittedValues": [],
|
|
134
|
+
"isOutputColumn": false,
|
|
135
|
+
"matchLengthOfArgument": "",
|
|
136
|
+
"allowPadding": false,
|
|
137
|
+
"defaultValue": false,
|
|
138
|
+
"name": "OutputNearestVector",
|
|
139
|
+
"alternateNames": [],
|
|
140
|
+
"isRequired": false,
|
|
141
|
+
"rDescription": "Specify whether to output the vector for the nearest neighbor.",
|
|
142
|
+
"description": "Specify whether to output the vector for the nearest neighbor.",
|
|
143
|
+
"datatype": "BOOLEAN",
|
|
144
|
+
"allowsLists": false,
|
|
145
|
+
"rName": "output.nearestvector",
|
|
146
|
+
"useInR": true,
|
|
147
|
+
"rOrderNum": 7
|
|
148
|
+
},
|
|
149
|
+
{
|
|
150
|
+
"permittedValues": [],
|
|
151
|
+
"isOutputColumn": false,
|
|
152
|
+
"matchLengthOfArgument": "",
|
|
153
|
+
"allowPadding": false,
|
|
154
|
+
"defaultValue": false,
|
|
155
|
+
"name": "OutputSimilarity",
|
|
156
|
+
"alternateNames": [],
|
|
157
|
+
"isRequired": false,
|
|
158
|
+
"rDescription": "Specify whether to output similarity of input datapoint to the nearest vector. If it is set to false, then the function outputs distance instead of similarity.",
|
|
159
|
+
"description": "Specify whether to output similarity of input datapoint to the nearest vector. If it is set to false, then the function outputs distance instead of similarity.",
|
|
160
|
+
"datatype": "BOOLEAN",
|
|
161
|
+
"allowsLists": false,
|
|
162
|
+
"rName": "output.similarity",
|
|
163
|
+
"useInR": true,
|
|
164
|
+
"rOrderNum": 8
|
|
165
|
+
},
|
|
166
|
+
{
|
|
167
|
+
"targetTable": [
|
|
168
|
+
"InputTable"
|
|
169
|
+
],
|
|
170
|
+
"checkDuplicate": true,
|
|
171
|
+
"allowedTypes": [],
|
|
172
|
+
"allowedTypeGroups": [
|
|
173
|
+
"ALL"
|
|
174
|
+
],
|
|
175
|
+
"matchLengthOfArgument": "",
|
|
176
|
+
"allowPadding": false,
|
|
177
|
+
"name": "Accumulate",
|
|
178
|
+
"alternateNames": [],
|
|
179
|
+
"isRequired": false,
|
|
180
|
+
"rDescription": "Specifies the input table columns to copy to the output table.",
|
|
181
|
+
"description": "Specifies the input table columns to copy to the output table.",
|
|
182
|
+
"datatype": "COLUMNS",
|
|
183
|
+
"allowsLists": true,
|
|
184
|
+
"rName": "accumulate",
|
|
185
|
+
"useInR": true,
|
|
186
|
+
"rOrderNum": 9
|
|
187
|
+
},
|
|
188
|
+
{
|
|
189
|
+
"permittedValues": [],
|
|
190
|
+
"isOutputColumn": false,
|
|
191
|
+
"matchLengthOfArgument": "",
|
|
192
|
+
"allowPadding": false,
|
|
193
|
+
"defaultValue": false,
|
|
194
|
+
"name": "SingleInputRow",
|
|
195
|
+
"alternateNames": [],
|
|
196
|
+
"isRequired": false,
|
|
197
|
+
"rDescription": "Specify whether input data contains only a single row.",
|
|
198
|
+
"description": "Specify whether input data contains only a single row.",
|
|
199
|
+
"datatype": "BOOLEAN",
|
|
200
|
+
"allowsLists": false,
|
|
201
|
+
"rName": "single.inputrow",
|
|
202
|
+
"useInR": true,
|
|
203
|
+
"rOrderNum": 10
|
|
204
|
+
}
|
|
205
|
+
]
|
|
206
|
+
}
|
|
@@ -0,0 +1,32 @@
|
|
|
1
|
+
{
|
|
2
|
+
"json_schema_major_version": "1",
|
|
3
|
+
"json_schema_minor_version": "1",
|
|
4
|
+
"json_content_version": "1",
|
|
5
|
+
"function_name": "TD_HNSWSummary",
|
|
6
|
+
"function_version": "1.0",
|
|
7
|
+
"function_type": "fastpath",
|
|
8
|
+
"function_category": "Model Exploration",
|
|
9
|
+
"function_alias_name": "TD_HNSWSummary",
|
|
10
|
+
"function_r_name": "td.hnswsummary",
|
|
11
|
+
"short_description": "The TD_HNSWSummary function takes the ModelTable generated from TD_HNSW function as input and converts the ModelTable data into readable format.",
|
|
12
|
+
"long_description": "The TD_HNSWSummary function takes the ModelTable generated from TD_HNSW function as input and converts the ModelTable data into readable format.",
|
|
13
|
+
"input_tables": [
|
|
14
|
+
{
|
|
15
|
+
"requiredInputKind": [
|
|
16
|
+
"PartitionByAny"
|
|
17
|
+
],
|
|
18
|
+
"isOrdered": false,
|
|
19
|
+
"partitionByOne": false,
|
|
20
|
+
"name": "ModelTable",
|
|
21
|
+
"alternateNames": [],
|
|
22
|
+
"isRequired": true,
|
|
23
|
+
"rDescription": "The table that contains the HNSW model for exploration.",
|
|
24
|
+
"description": "The table that contains the HNSW model for exploration.",
|
|
25
|
+
"datatype": "TABLE_ALIAS",
|
|
26
|
+
"allowsLists": false,
|
|
27
|
+
"rName": "object",
|
|
28
|
+
"useInR": true,
|
|
29
|
+
"rOrderNum": 1
|
|
30
|
+
}
|
|
31
|
+
]
|
|
32
|
+
}
|
|
@@ -92,7 +92,7 @@
|
|
|
92
92
|
"checkDuplicate": true,
|
|
93
93
|
"allowedTypes": [],
|
|
94
94
|
"allowedTypeGroups": [
|
|
95
|
-
"NUMERIC","
|
|
95
|
+
"NUMERIC","VECTOR","BYTE","VARBYTE"
|
|
96
96
|
],
|
|
97
97
|
"matchLengthOfArgument": "",
|
|
98
98
|
"allowPadding": false,
|
|
@@ -247,4 +247,4 @@
|
|
|
247
247
|
"rOrderNum": 13
|
|
248
248
|
}
|
|
249
249
|
]
|
|
250
|
-
}
|
|
250
|
+
}
|
|
@@ -5,7 +5,7 @@
|
|
|
5
5
|
"function_name": "TD_SMOTE",
|
|
6
6
|
"function_version": "1.0",
|
|
7
7
|
"function_type": "fastpath",
|
|
8
|
-
"function_category": "Feature Engineering Transform
|
|
8
|
+
"function_category": "Feature Engineering Transform",
|
|
9
9
|
"function_alias_name": "TD_SMOTE",
|
|
10
10
|
"function_r_name": "aa.td_smote",
|
|
11
11
|
"ref_function_r_name": "aa.td_smote",
|
|
@@ -168,10 +168,10 @@
|
|
|
168
168
|
},
|
|
169
169
|
{
|
|
170
170
|
"isOutputColumn": false,
|
|
171
|
-
"
|
|
171
|
+
"defaultValue": 5,
|
|
172
172
|
"name": "OversamplingFactor",
|
|
173
173
|
"alternateNames": [],
|
|
174
|
-
"isRequired":
|
|
174
|
+
"isRequired": false,
|
|
175
175
|
"rDescription": "specifies the factor for oversampling the minority class.",
|
|
176
176
|
"description": "specifies the factor for oversampling the minority class.",
|
|
177
177
|
"datatype": "DOUBLE",
|
|
@@ -77,7 +77,7 @@
|
|
|
77
77
|
"checkDuplicate": true,
|
|
78
78
|
"allowedTypes": [],
|
|
79
79
|
"allowedTypeGroups": [
|
|
80
|
-
"NUMERIC","
|
|
80
|
+
"NUMERIC","VECTOR","BYTE","VARBYTE"
|
|
81
81
|
],
|
|
82
82
|
"matchLengthOfArgument": "",
|
|
83
83
|
"allowPadding": false,
|
|
@@ -122,7 +122,7 @@
|
|
|
122
122
|
"checkDuplicate": true,
|
|
123
123
|
"allowedTypes": [],
|
|
124
124
|
"allowedTypeGroups": [
|
|
125
|
-
"NUMERIC","
|
|
125
|
+
"NUMERIC","VECTOR","BYTE","VARBYTE"
|
|
126
126
|
],
|
|
127
127
|
"matchLengthOfArgument": "",
|
|
128
128
|
"allowPadding": false,
|
|
@@ -143,7 +143,7 @@
|
|
|
143
143
|
"EUCLIDEAN",
|
|
144
144
|
"MANHATTAN",
|
|
145
145
|
"DOTPRODUCT",
|
|
146
|
-
"MINKOWSKI"
|
|
146
|
+
"MINKOWSKI"
|
|
147
147
|
],
|
|
148
148
|
"defaultValue": "COSINE",
|
|
149
149
|
"isOutputColumn": false,
|
|
@@ -163,7 +163,7 @@
|
|
|
163
163
|
{
|
|
164
164
|
"defaultValue": 10,
|
|
165
165
|
"lowerBound": 1,
|
|
166
|
-
"upperBound":
|
|
166
|
+
"upperBound": 1024,
|
|
167
167
|
"lowerBoundType": "INCLUSIVE",
|
|
168
168
|
"upperBoundType": "INCLUSIVE",
|
|
169
169
|
"allowNaN": false,
|
|
@@ -203,7 +203,7 @@
|
|
|
203
203
|
"rOrderNum": 9
|
|
204
204
|
},
|
|
205
205
|
{
|
|
206
|
-
"lowerBound":
|
|
206
|
+
"lowerBound": -1.797693e+308,
|
|
207
207
|
"upperBound": 1.797693e+308,
|
|
208
208
|
"lowerBoundType": "INCLUSIVE",
|
|
209
209
|
"upperBoundType": "INCLUSIVE",
|
|
@@ -275,4 +275,4 @@
|
|
|
275
275
|
"rOrderNum": 13
|
|
276
276
|
}
|
|
277
277
|
]
|
|
278
|
-
}
|
|
278
|
+
}
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
"id","txt"
|
|
2
|
+
1,"At end of August, the Janus Unconstrained fund held only 45 debt issues with 70 percent of its assets in U.S. government debt."
|
|
3
|
+
2,"One Treasury issue due June 2016 alone was worth 43 percent of the fund's total assets."
|
|
4
|
+
3,"Most of the bonds have short durations, with the average maturity of just over three years, indicating a generally defensive posture."
|
|
5
|
+
4,"For Bill Gross, quitting Pimco's $222 billion Total Return Fund to take over a $13 million fund at Janus Capital is like resigning the U.S. presidency to become city manager of Ashtabula, Ohio, population 18,800."
|
|
6
|
+
5,"Gross stunned the investing world on Friday with his abrupt departure from Pimco, the $2 trillion asset manager he co-founded in 1971 and where he had run the Total Return Fund, the world's biggest bond fund, for more than 27 years."
|
|
7
|
+
6,"[0-9]+"
|